Created
January 25, 2021 09:45
-
-
Save Morwenn/3f46497aa0a5e45afb6824f08bdca197 to your computer and use it in GitHub Desktop.
Tentative to get a better swap_ranges (but hits optimizer issues)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template<typename ForwardIterator1, typename ForwardIterator2> | |
auto swap_ranges_impl(std::false_type, | |
ForwardIterator1 first1, ForwardIterator1 last1, | |
ForwardIterator2 first2) | |
-> ForwardIterator2 | |
{ | |
return std::swap_ranges(first1, last1, first2); | |
} | |
template<typename RandomAccessIterator1, typename RandomAccessIterator2> | |
auto swap_ranges_impl(std::true_type, | |
RandomAccessIterator1 __restrict first1, RandomAccessIterator1 __restrict last1, | |
RandomAccessIterator2 __restrict first2) | |
-> RandomAccessIterator2 | |
{ | |
using value_type = remove_cvref_t<value_type_t<RandomAccessIterator1>>; | |
using difference_type = difference_type_t<RandomAccessIterator1>; | |
constexpr std::size_t cacheline_size = 64; | |
constexpr std::size_t storage_size = cacheline_size / sizeof(value_type); | |
// When possible, swap elements chunk by chunk via a cacheline-sized buffer | |
// this seems to improve the speed of the algorithm for some types | |
alignas(cacheline_size) value_type tmp[storage_size]; | |
auto size = last1 - first1; | |
while (size >= difference_type(storage_size)) { | |
detail::move(first1, first1 + storage_size, tmp); | |
first1 = detail::move(first2, first2 + storage_size, first1); | |
first2 = detail::move(tmp, tmp + storage_size, first2); | |
size -= storage_size; | |
} | |
return std::swap_ranges(first1, last1, first2); | |
} | |
//template<typename T> void show_type(); | |
template<typename ForwardIterator1, typename ForwardIterator2> | |
auto swap_ranges(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2) | |
-> ForwardIterator2 | |
{ | |
using value_type = remove_cvref_t<value_type_t<ForwardIterator1>>; | |
using category1 = iterator_category_t<ForwardIterator1>; | |
using category2 = iterator_category_t<ForwardIterator2>; | |
// Compute the number of elements that can be stored in a cache line | |
constexpr std::size_t cacheline_size = 64; | |
constexpr std::size_t storage_size = cacheline_size / sizeof(value_type); | |
// All the conditions required to batch element swaps | |
using can_optimize = std::integral_constant<bool, | |
std::is_base_of<std::random_access_iterator_tag, category1>::value && | |
std::is_base_of<std::random_access_iterator_tag, category2>::value && | |
std::is_trivial<value_type>::value && | |
(storage_size >= 4) | |
>; | |
return swap_ranges_impl(can_optimize{}, first1, last1, first2); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment