Last active
September 19, 2020 13:49
-
-
Save leifwalsh/10009591 to your computer and use it in GitHub Desktop.
mcs lock: userspace c++11 implementation of http://lwn.net/Articles/590243/. seems to be awful---far worse than normal spinlock---with nthreads > ncpus
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# clang version 3.4 (tags/RELEASE_34/final) | |
# Intel(R) Core(TM) i5-3470 CPU @ 3.20GHz | |
spinlock 1: 3925ms | |
spinlock 2: 25665ms | |
spinlock 3: 27722ms | |
spinlock 4: 42727ms | |
std::mutex 1: 5526ms | |
std::mutex 2: 17672ms | |
std::mutex 3: 31038ms | |
std::mutex 4: 36890ms | |
mcs_lock 1: 4014ms | |
mcs_lock 2: 23112ms | |
mcs_lock 3: 26940ms | |
mcs_lock 4: 55371ms | |
# gcc version 4.8.2 20140206 (prerelease) (GCC) | |
# Intel(R) Core(TM) i5-3470 CPU @ 3.20GHz | |
spinlock 1: 5954ms | |
spinlock 2: 23907ms | |
spinlock 3: 29344ms | |
spinlock 4: 39561ms | |
std::mutex 1: 5549ms | |
std::mutex 2: 25904ms | |
std::mutex 3: 40386ms | |
std::mutex 4: 45023ms | |
mcs_lock 1: 4450ms | |
mcs_lock 2: 30693ms | |
mcs_lock 3: 27023ms | |
mcs_lock 4: 39660ms | |
# clang version 3.4 (tags/RELEASE_34/final) | |
# Intel(R) Core(TM) i5-4300U CPU @ 1.90GHz | |
spinlock 1: 4426ms | |
spinlock 2: 36351ms | |
spinlock 3: 42095ms | |
spinlock 4: 74889ms | |
std::mutex 1: 7989ms | |
std::mutex 2: 23520ms | |
std::mutex 3: 32154ms | |
std::mutex 4: 36576ms | |
mcs_lock 1: 4979ms | |
mcs_lock 2: 36937ms | |
mcs_lock 3: 29988ms | |
mcs_lock 4: 32646ms | |
# gcc version 4.8.2 20140206 (prerelease) (GCC) | |
# Intel(R) Core(TM) i5-4300U CPU @ 1.90GHz | |
spinlock 1: 7193ms | |
spinlock 2: 12639ms | |
spinlock 3: 117277ms | |
spinlock 4: 121082ms | |
std::mutex 1: 7672ms | |
std::mutex 2: 23599ms | |
std::mutex 3: 32680ms | |
std::mutex 4: 35891ms | |
mcs_lock 1: 4883ms | |
mcs_lock 2: 36233ms | |
mcs_lock 3: 29314ms | |
mcs_lock 4: 34758ms | |
# Apple LLVM version 5.1 (clang-503.0.38) (based on LLVM 3.4svn) | |
# Intel(R) Core(TM) i7-2677M CPU @ 1.80GHz | |
# (turned n down from 1<<28 to 1<<24) | |
spinlock 1: 342ms | |
spinlock 2: 3474ms | |
spinlock 3: 3848ms | |
spinlock 4: 3667ms | |
std::mutex 1: 1307ms | |
std::mutex 2: 53748ms | |
std::mutex 3: 53613ms | |
std::mutex 4: 52825ms | |
mcs_lock 1: 344ms | |
mcs_lock 2: 749ms | |
mcs_lock 3: 1497ms | |
mcs_lock 4: 1928ms | |
# g++-4.8 (Ubuntu/Linaro 4.8.1-10ubuntu9) 4.8.1 | |
# Intel(R) Xeon(R) CPU E5540 @ 2.53GHz | |
spinlock 1: 5967ms | |
spinlock 2: 26993ms | |
spinlock 3: 29944ms | |
spinlock 4: 33079ms | |
spinlock 5: 44120ms | |
spinlock 6: 52264ms | |
spinlock 7: 61463ms | |
spinlock 8: 67250ms | |
spinlock 9: 72514ms | |
spinlock 10: 85820ms | |
spinlock 11: 89884ms | |
spinlock 12: 98427ms | |
spinlock 13: 105303ms | |
spinlock 14: 110160ms | |
spinlock 15: 114706ms | |
spinlock 16: 116581ms | |
std::mutex 1: 4907ms | |
std::mutex 2: 14258ms | |
std::mutex 3: 36275ms | |
std::mutex 4: 41975ms | |
std::mutex 5: 40615ms | |
std::mutex 6: 36456ms | |
std::mutex 7: 35691ms | |
std::mutex 8: 29196ms | |
std::mutex 9: 27997ms | |
std::mutex 10: 26738ms | |
std::mutex 11: 26207ms | |
std::mutex 12: 25737ms | |
std::mutex 13: 25337ms | |
std::mutex 14: 24937ms | |
std::mutex 15: 24852ms | |
std::mutex 16: 24480ms | |
mcs_lock 1: 4447ms | |
mcs_lock 2: 103555ms | |
mcs_lock 3: 88553ms | |
mcs_lock 4: 67063ms | |
mcs_lock 5: 68666ms | |
mcs_lock 6: 65370ms | |
mcs_lock 7: 61647ms | |
mcs_lock 8: 73226ms | |
mcs_lock 9: 69561ms | |
mcs_lock 10: 68885ms | |
mcs_lock 11: 75992ms | |
mcs_lock 12: 60612ms | |
mcs_lock 13: 77011ms | |
mcs_lock 14: 75698ms | |
mcs_lock 15: 71876ms | |
mcs_lock 16: 62400ms |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
COMPILER=g++ | |
#COMPILER=g++-4.8 | |
#COMPILER=clang++ | |
CXX=$(COMPILER) -std=c++11 | |
# osx: | |
#CXX=$(COMPILER) -std=c++11 -stdlib=libc++ | |
CXXFLAGS=-Wall -Werror -g -O2 | |
LDFLAGS=-pthread -lpthread | |
# ubuntu is insane: | |
#LDFLAGS=-pthread -lpthread -Wl,--no-as-needed | |
all: mcs_lock_test | |
mcs_lock_test: mcs_lock_test.cpp mcs_lock.hpp | |
$(CXX) $(CXXFLAGS) $(LDFLAGS) $< -o $@ | |
clean: | |
$(RM) mcs_lock_test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <atomic> | |
// http://lwn.net/Articles/590243/ | |
class mcs_lock { | |
public: | |
class scoped_lock; | |
private: | |
std::atomic<scoped_lock *> _tail; | |
public: | |
mcs_lock(const mcs_lock&) = delete; | |
mcs_lock& operator=(const mcs_lock&) = delete; | |
mcs_lock() : _tail(nullptr) {} | |
class scoped_lock { | |
mcs_lock &_lock; | |
scoped_lock * volatile _next; | |
volatile bool _owned; | |
public: | |
scoped_lock(const scoped_lock&) = delete; | |
scoped_lock& operator=(const scoped_lock&) = delete; | |
scoped_lock(mcs_lock &lock) : _lock(lock), _next(nullptr), _owned(false) { | |
scoped_lock *tail = _lock._tail.exchange(this); | |
if (tail != nullptr) { | |
tail->_next = this; | |
while (!_owned) { | |
// spin me right round | |
} | |
} | |
} | |
~scoped_lock() { | |
scoped_lock *tail = this; | |
if (!_lock._tail.compare_exchange_strong(tail, nullptr)) { | |
while (_next == nullptr) { | |
// baby right round | |
} | |
_next->_owned = true; | |
} | |
} | |
}; | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "mcs_lock.hpp" | |
#include <atomic> | |
class spin_lock { | |
public: | |
class scoped_lock; | |
private: | |
std::atomic<scoped_lock *> _lock; | |
public: | |
spin_lock(const spin_lock&) = delete; | |
spin_lock& operator=(const spin_lock&) = delete; | |
spin_lock() : _lock(nullptr) {} | |
class scoped_lock { | |
spin_lock &_lock; | |
public: | |
scoped_lock(const scoped_lock&) = delete; | |
scoped_lock& operator=(const scoped_lock&) = delete; | |
scoped_lock(spin_lock &lock) : _lock(lock) { | |
scoped_lock *expect = nullptr; | |
while (!_lock._lock.compare_exchange_weak(expect, this)) { | |
expect = nullptr; | |
} | |
} | |
~scoped_lock() { | |
_lock._lock = nullptr; | |
} | |
}; | |
}; | |
template<class mutex, class lock_object> | |
void spinner(mutex *m, int *val, std::size_t n) { | |
for (std::size_t i = 0; i < n; ++i) { | |
lock_object lk(*m); | |
(*val)++; | |
} | |
} | |
#include <chrono> | |
template<typename Clock> | |
class timer { | |
std::chrono::time_point<Clock> _t0; | |
public: | |
timer() : _t0(Clock::now()) {} | |
timer(const timer&) = delete; | |
timer& operator=(const timer&) = delete; | |
void reset() { _t0 = Clock::now(); } | |
operator std::chrono::milliseconds() const { | |
return std::chrono::duration_cast<std::chrono::milliseconds>(Clock::now() - _t0); | |
} | |
}; | |
#include <algorithm> | |
#include <functional> | |
#include <iostream> | |
#include <mutex> | |
#include <string> | |
#include <thread> | |
#include <vector> | |
template<class mutex, class lock_object> | |
void test(const std::string &name, std::size_t nthreads) { | |
std::vector<std::thread> threads; | |
mutex m; | |
int i = 0; | |
timer<std::chrono::high_resolution_clock> t; | |
std::generate_n(std::back_inserter(threads), nthreads, | |
[&m, &i, nthreads]() { return std::move(std::thread(&spinner<mutex, lock_object>, &m, &i, (1<<28) / nthreads)); }); | |
std::for_each(threads.begin(), threads.end(), std::mem_fn(&std::thread::join)); | |
std::cout << name << " " << nthreads << ":\t" << static_cast<std::chrono::milliseconds>(t).count() << "ms" << std::endl; | |
} | |
int main(void) { | |
for (std::size_t i = 1; i <= std::thread::hardware_concurrency(); ++i) { | |
test<spin_lock, spin_lock::scoped_lock>("spinlock", i); | |
} | |
for (std::size_t i = 1; i <= std::thread::hardware_concurrency(); ++i) { | |
test<std::mutex, std::unique_lock<std::mutex> >("std::mutex", i); | |
} | |
for (std::size_t i = 1; i <= std::thread::hardware_concurrency(); ++i) { | |
test<mcs_lock, mcs_lock::scoped_lock>("mcs_lock", i); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment