|
|
@ -0,0 +1,469 @@ |
|
|
|
// Build with: clang++ --std=c++20 -g -O1 schedtest.cpp
|
|
|
|
|
|
|
|
#include <array>
|
|
|
|
#include <span>
|
|
|
|
#include <memory>
|
|
|
|
#include <list>
|
|
|
|
#include <functional>
|
|
|
|
#include <iostream>
|
|
|
|
#include <map>
|
|
|
|
#include <chrono>
|
|
|
|
#include <exception>
|
|
|
|
#include <atomic>
|
|
|
|
#include <set>
|
|
|
|
#include <vector>
|
|
|
|
#include <optional>
|
|
|
|
|
|
|
|
#if defined(__x86_64__)
|
|
|
|
#if defined(__GNUG__)
|
|
|
|
constexpr bool is_x86_64_linux = true; |
|
|
|
#else
|
|
|
|
#if defined(__clang__)
|
|
|
|
constexpr bool is_x86_64_linux = true; |
|
|
|
#else
|
|
|
|
constexpr bool is_x86_64_linux = false; |
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#else
|
|
|
|
constexpr bool is_x86_64_linux = false; |
|
|
|
#endif
|
|
|
|
|
|
|
|
static_assert(is_x86_64_linux, "This code probably only works on x86_64 GNU Extensions C++"); |
|
|
|
|
|
|
|
struct platform_data { |
|
|
|
platform_data() = default; |
|
|
|
|
|
|
|
platform_data(std::span<char> stack_str) |
|
|
|
: stack_ptr(&*(stack_str.end()-16)) |
|
|
|
, base_ptr(stack_ptr) |
|
|
|
{} |
|
|
|
|
|
|
|
uint64_t rbx, r12, r13, r14, r15; |
|
|
|
|
|
|
|
void* stack_ptr; |
|
|
|
void* base_ptr; |
|
|
|
|
|
|
|
void pull() __attribute__((always_inline)) |
|
|
|
{ |
|
|
|
__asm__ __volatile__( |
|
|
|
"movq %%rsp, %0\n" |
|
|
|
"movq %%rbp, %1\n" |
|
|
|
"movq %%rbx, %2\n" |
|
|
|
"movq %%r12, %3\n" |
|
|
|
"movq %%r13, %4\n" |
|
|
|
"movq %%r14, %5\n" |
|
|
|
"movq %%r15, %6\n" |
|
|
|
: "=m"(stack_ptr) |
|
|
|
, "=m"(base_ptr) |
|
|
|
, "=m"(rbx) |
|
|
|
, "=m"(r12) |
|
|
|
, "=m"(r13) |
|
|
|
, "=m"(r14) |
|
|
|
, "=m"(r15) |
|
|
|
); |
|
|
|
} |
|
|
|
|
|
|
|
void* push(void* location) __attribute__((always_inline)) |
|
|
|
{ |
|
|
|
volatile void* volatile tmp = static_cast<char*>(stack_ptr) - sizeof(void*); |
|
|
|
*static_cast<volatile void* volatile * volatile>(tmp) = location; |
|
|
|
__asm__ __volatile__( |
|
|
|
"movq %1, %%rsp\n" |
|
|
|
"movq %2, %%rbp\n" |
|
|
|
"movq %3, %%rbx\n" |
|
|
|
"movq %4, %%r12\n" |
|
|
|
"movq %5, %%r13\n" |
|
|
|
"movq %6, %%r14\n" |
|
|
|
"movq %7, %%r15\n" |
|
|
|
"popq %0\n" |
|
|
|
: "+r"(location) |
|
|
|
: "m"(tmp) |
|
|
|
, "m"(base_ptr) |
|
|
|
, "m"(rbx) |
|
|
|
, "m"(r12) |
|
|
|
, "m"(r13) |
|
|
|
, "m"(r14) |
|
|
|
, "m"(r15) |
|
|
|
: "memory" |
|
|
|
); |
|
|
|
return location; |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
enum class process_status { |
|
|
|
inactive = 0, |
|
|
|
running = 1, |
|
|
|
waiting = 2, |
|
|
|
finished = 3, |
|
|
|
zombie = 4 |
|
|
|
}; |
|
|
|
|
|
|
|
struct process { |
|
|
|
static int64_t counter; |
|
|
|
|
|
|
|
char* stack; |
|
|
|
size_t sz; |
|
|
|
platform_data scheduling_swapper; |
|
|
|
process_status state = process_status::inactive; |
|
|
|
std::function<void()> fn; |
|
|
|
int64_t t_id; |
|
|
|
|
|
|
|
process(std::function<void()> _fn, size_t _sz = 16384) |
|
|
|
: stack(new char[_sz]) |
|
|
|
, sz(_sz) |
|
|
|
, scheduling_swapper(std::span<char>(stack, sz)) |
|
|
|
, fn(_fn) |
|
|
|
, t_id(counter++) |
|
|
|
{} |
|
|
|
|
|
|
|
process(char* stack_base) |
|
|
|
: stack(stack_base) |
|
|
|
, sz(0) |
|
|
|
, t_id(counter++) |
|
|
|
{} |
|
|
|
|
|
|
|
process(const process&) = delete; |
|
|
|
|
|
|
|
~process() { |
|
|
|
if(sz) delete[] stack; |
|
|
|
} |
|
|
|
}; |
|
|
|
int64_t process::counter = 0; |
|
|
|
|
|
|
|
|
|
|
|
__attribute__((noinline)) struct system* spawner (struct system* sys); |
|
|
|
|
|
|
|
struct system { |
|
|
|
static system sys; |
|
|
|
|
|
|
|
std::list<std::unique_ptr<process>> |
|
|
|
running, |
|
|
|
waiting, |
|
|
|
naughty; |
|
|
|
|
|
|
|
std::unique_ptr<process> previous; |
|
|
|
std::unique_ptr<process> current; |
|
|
|
|
|
|
|
std::unique_ptr<process> one() { |
|
|
|
auto v = std::move(running.back()); |
|
|
|
running.pop_back(); |
|
|
|
return v; |
|
|
|
} |
|
|
|
|
|
|
|
void rid(std::unique_ptr<process> current) { |
|
|
|
switch(current->state) { |
|
|
|
case process_status::inactive: |
|
|
|
case process_status::running: |
|
|
|
running.push_front(std::move(current)); |
|
|
|
break; |
|
|
|
case process_status::finished: |
|
|
|
clean(std::move(current)); |
|
|
|
break; |
|
|
|
case process_status::zombie: |
|
|
|
naughty.push_front(std::move(current)); |
|
|
|
break; |
|
|
|
case process_status::waiting: |
|
|
|
waiting.push_front(std::move(current)); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
void clean(std::unique_ptr<process>) {} |
|
|
|
|
|
|
|
void yield_to(std::unique_ptr<process> target) noexcept { |
|
|
|
current->scheduling_swapper.pull(); |
|
|
|
sys.rid(std::move(current)); |
|
|
|
current = std::move(target); |
|
|
|
current->scheduling_swapper.push(this); |
|
|
|
spawner(&sys); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void yield() noexcept { |
|
|
|
current->scheduling_swapper.pull(); |
|
|
|
sys.rid(std::move(current)); |
|
|
|
current = one(); |
|
|
|
current->scheduling_swapper.push(this); |
|
|
|
spawner(&sys); |
|
|
|
} |
|
|
|
|
|
|
|
template<typename fn> |
|
|
|
void steal_and_yield(fn func) noexcept { |
|
|
|
current->scheduling_swapper.pull(); |
|
|
|
func(std::move(current)); |
|
|
|
current = one(); |
|
|
|
current->scheduling_swapper.push(this); |
|
|
|
spawner(&sys); |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
// Needs to return the system one way or another
|
|
|
|
__attribute__((noinline)) struct system* spawner (struct system* sys) { |
|
|
|
auto& proc = *system::sys.current; |
|
|
|
if(proc.state == process_status::inactive) { |
|
|
|
proc.state = process_status::running; |
|
|
|
proc.fn(); |
|
|
|
proc.state = process_status::finished; |
|
|
|
sys->current->scheduling_swapper.pull(); |
|
|
|
sys->yield(); |
|
|
|
} |
|
|
|
return sys; |
|
|
|
} |
|
|
|
|
|
|
|
struct system system::sys; |
|
|
|
|
|
|
|
/********************* ***********************/ |
|
|
|
|
|
|
|
class dirty_bottleneck { |
|
|
|
std::atomic_bool flag; |
|
|
|
|
|
|
|
[[nodiscard]] bool try_lock() { |
|
|
|
bool f = false; |
|
|
|
bool t = true; |
|
|
|
return flag.compare_exchange_strong(f,t,std::memory_order::acquire); |
|
|
|
} |
|
|
|
|
|
|
|
[[nodiscard]] bool try_unlock() { |
|
|
|
bool f = false; |
|
|
|
bool t = true; |
|
|
|
return flag.compare_exchange_strong(t,f,std::memory_order::release); |
|
|
|
} |
|
|
|
public: |
|
|
|
dirty_bottleneck() = default; |
|
|
|
dirty_bottleneck(dirty_bottleneck&) = delete; |
|
|
|
dirty_bottleneck(dirty_bottleneck&&) = delete; |
|
|
|
|
|
|
|
void lock() { |
|
|
|
while(not try_lock()); |
|
|
|
} |
|
|
|
|
|
|
|
void unlock() { |
|
|
|
if(!try_unlock()) throw std::runtime_error("Unlocking failed in dirty_bottleneck: potential double unlocking issue"); |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
/********************* ***********************/ |
|
|
|
|
|
|
|
template<typename T> |
|
|
|
class lock_guard { |
|
|
|
T& ref; |
|
|
|
public: |
|
|
|
lock_guard(T& _ref) |
|
|
|
: ref(_ref) |
|
|
|
{ |
|
|
|
ref.lock(); |
|
|
|
} |
|
|
|
|
|
|
|
~lock_guard() { |
|
|
|
ref.unlock(); |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
/********************* ***********************/ |
|
|
|
|
|
|
|
using mutex_handle = size_t; |
|
|
|
|
|
|
|
enum class thread_state { |
|
|
|
locking, |
|
|
|
waiting, |
|
|
|
unlocking |
|
|
|
}; |
|
|
|
|
|
|
|
enum class mutex_state { |
|
|
|
remove = 0, |
|
|
|
create = 1 |
|
|
|
}; |
|
|
|
|
|
|
|
using namespace std::chrono_literals; |
|
|
|
|
|
|
|
void mutex_state_update(mutex_handle, mutex_state); |
|
|
|
void signal_locking(thread_state state, mutex_handle mtx, int64_t thrd); |
|
|
|
|
|
|
|
class fast_bottleneck { |
|
|
|
|
|
|
|
/** This is a secret tool that will help us later **/ |
|
|
|
static std::atomic<size_t> counter; |
|
|
|
const mutex_handle handle; |
|
|
|
dirty_bottleneck trigger_lock; |
|
|
|
std::list<std::unique_ptr<process>> waiting; |
|
|
|
|
|
|
|
std::atomic_bool flag; |
|
|
|
|
|
|
|
[[nodiscard]] bool try_lock() { |
|
|
|
bool f = false; |
|
|
|
bool t = true; |
|
|
|
return flag.compare_exchange_strong(f,t,std::memory_order::acquire); |
|
|
|
} |
|
|
|
|
|
|
|
[[nodiscard]] bool try_unlock() { |
|
|
|
bool f = false; |
|
|
|
bool t = true; |
|
|
|
return flag.compare_exchange_strong(t,f,std::memory_order::release); |
|
|
|
} |
|
|
|
public: |
|
|
|
fast_bottleneck() |
|
|
|
: flag() |
|
|
|
, handle(counter.fetch_add(1)) //< We have to initialize that
|
|
|
|
{ |
|
|
|
mutex_state_update(handle, mutex_state::create); |
|
|
|
} |
|
|
|
fast_bottleneck(fast_bottleneck&) = delete; |
|
|
|
fast_bottleneck(fast_bottleneck&&) = delete; |
|
|
|
fast_bottleneck& operator=(fast_bottleneck&) = delete; |
|
|
|
fast_bottleneck& operator=(fast_bottleneck&&) = delete; |
|
|
|
|
|
|
|
~fast_bottleneck() { |
|
|
|
mutex_state_update(handle, mutex_state::remove); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void lock() { |
|
|
|
/// The exponential backing variables
|
|
|
|
constexpr std::chrono::milliseconds max{1}; |
|
|
|
std::chrono::nanoseconds wait{256}; |
|
|
|
while(not try_lock()) { |
|
|
|
/// The implementation of our little trick when waiting
|
|
|
|
signal_locking(thread_state::waiting, handle, system::sys.current->t_id); |
|
|
|
|
|
|
|
system::sys.steal_and_yield([&](std::unique_ptr<process> p){ |
|
|
|
lock_guard triggers(trigger_lock); |
|
|
|
p->state = process_status::waiting; |
|
|
|
waiting.push_front(std::move(p)); |
|
|
|
}); |
|
|
|
|
|
|
|
/// The exponential backing
|
|
|
|
//std::this_thread::sleep_for(wait);
|
|
|
|
wait += wait < max ? std::chrono::nanoseconds(wait.count()/2) : 0ns; |
|
|
|
} |
|
|
|
/// The implementation of our little trick when locking
|
|
|
|
signal_locking(thread_state::locking, handle, system::sys.current->t_id); |
|
|
|
} |
|
|
|
|
|
|
|
void unlock() { |
|
|
|
if(!try_unlock()) throw std::runtime_error("Unlocking failed in fast_bottleneck: potential double unlocking issue"); |
|
|
|
/// The implementation of our little trick when unlocking
|
|
|
|
signal_locking(thread_state::unlocking, handle, system::sys.current->t_id); |
|
|
|
|
|
|
|
{ |
|
|
|
lock_guard triggers(trigger_lock); |
|
|
|
if(waiting.size()) { |
|
|
|
system::sys.running.push_front(std::move(waiting.back())); |
|
|
|
waiting.pop_back(); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
dirty_bottleneck checker_lock; |
|
|
|
dirty_bottleneck lister_lock; |
|
|
|
std::map<int64_t, std::vector<mutex_handle>> owned_locks; |
|
|
|
std::map<int64_t, std::optional<mutex_handle>> waiting_locks; |
|
|
|
std::set<mutex_handle> locks_that_exist; |
|
|
|
|
|
|
|
void mutex_state_update(mutex_handle mtx, mutex_state state) { |
|
|
|
lock_guard lister(lister_lock); |
|
|
|
switch(state) { |
|
|
|
case mutex_state::create: { |
|
|
|
locks_that_exist.insert(mtx); |
|
|
|
}break; |
|
|
|
case mutex_state::remove: { |
|
|
|
locks_that_exist.erase(mtx); |
|
|
|
}break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
bool build_dependency_graph ( |
|
|
|
const mutex_handle mtx, |
|
|
|
const int64_t thrd, |
|
|
|
std::map<int64_t, std::vector<mutex_handle>>& owned_locks, |
|
|
|
std::map<int64_t, std::optional<mutex_handle>>& waiting_locks |
|
|
|
) { |
|
|
|
std::map<mutex_handle, std::set<mutex_handle>> graph; |
|
|
|
for(auto& elem : waiting_locks) { |
|
|
|
if(elem.second.has_value()) { |
|
|
|
for(auto& n : owned_locks[elem.first]) { |
|
|
|
graph[n].insert(elem.second.value()); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
std::set<mutex_handle> nodes; |
|
|
|
{ |
|
|
|
lock_guard lister(lister_lock); |
|
|
|
nodes = locks_that_exist; |
|
|
|
} |
|
|
|
|
|
|
|
bool happened = true; |
|
|
|
|
|
|
|
while(happened) { |
|
|
|
happened = false; |
|
|
|
for(auto& n : nodes) { |
|
|
|
if(graph[n].size() == 0) |
|
|
|
{ |
|
|
|
happened = true; |
|
|
|
for(auto v : graph) { |
|
|
|
v.second.erase(n); |
|
|
|
} |
|
|
|
nodes.erase(n); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return nodes.size(); |
|
|
|
} |
|
|
|
|
|
|
|
void signal_locking(thread_state state, mutex_handle mtx, int64_t thrd) { |
|
|
|
bool bad = false; |
|
|
|
{ |
|
|
|
lock_guard checker(checker_lock); |
|
|
|
switch(state) { |
|
|
|
case thread_state::locking: { |
|
|
|
waiting_locks[thrd].reset(); |
|
|
|
owned_locks[thrd].push_back(mtx); |
|
|
|
} break; |
|
|
|
case thread_state::unlocking: { |
|
|
|
auto it = std::find(owned_locks[thrd].begin(), owned_locks[thrd].end(), mtx); |
|
|
|
if(it != owned_locks[thrd].end()) { |
|
|
|
owned_locks[thrd].erase(it); |
|
|
|
} |
|
|
|
} break; |
|
|
|
case thread_state::waiting: { |
|
|
|
waiting_locks[thrd] = mtx; |
|
|
|
bad = build_dependency_graph(mtx, thrd, owned_locks, waiting_locks); |
|
|
|
} break; |
|
|
|
} |
|
|
|
} |
|
|
|
if(bad) throw std::runtime_error("Deadlock detected"); |
|
|
|
} |
|
|
|
|
|
|
|
std::atomic<size_t> fast_bottleneck::counter; |
|
|
|
|
|
|
|
/********************* ***********************/ |
|
|
|
|
|
|
|
fast_bottleneck A; |
|
|
|
fast_bottleneck B; |
|
|
|
|
|
|
|
int main() { |
|
|
|
char c; |
|
|
|
system::sys.current = std::make_unique<process>(&c); |
|
|
|
|
|
|
|
std::cout << "1" << std::endl; |
|
|
|
A.lock(); |
|
|
|
system::sys.current->state = process_status::running; |
|
|
|
system::sys.yield_to(std::make_unique<process>([](){ |
|
|
|
A.lock(); |
|
|
|
std::cout << "A" << std::endl; |
|
|
|
A.unlock(); |
|
|
|
})); |
|
|
|
A.unlock(); |
|
|
|
system::sys.yield(); |
|
|
|
system::sys.yield_to(std::make_unique<process>([](){ |
|
|
|
A.lock(); |
|
|
|
std::cout << "B" << std::endl; |
|
|
|
A.unlock(); |
|
|
|
})); |
|
|
|
std::cout << "3" << std::endl; |
|
|
|
return 0; |
|
|
|
} |