Skip to content

Instantly share code, notes, and snippets.

@Morwenn
Created January 25, 2021 09:45
Show Gist options
  • Save Morwenn/3f46497aa0a5e45afb6824f08bdca197 to your computer and use it in GitHub Desktop.
Save Morwenn/3f46497aa0a5e45afb6824f08bdca197 to your computer and use it in GitHub Desktop.
Tentative to get a better swap_ranges (but hits optimizer issues)
template<typename ForwardIterator1, typename ForwardIterator2>
auto swap_ranges_impl(std::false_type,
ForwardIterator1 first1, ForwardIterator1 last1,
ForwardIterator2 first2)
-> ForwardIterator2
{
return std::swap_ranges(first1, last1, first2);
}
template<typename RandomAccessIterator1, typename RandomAccessIterator2>
auto swap_ranges_impl(std::true_type,
RandomAccessIterator1 __restrict first1, RandomAccessIterator1 __restrict last1,
RandomAccessIterator2 __restrict first2)
-> RandomAccessIterator2
{
using value_type = remove_cvref_t<value_type_t<RandomAccessIterator1>>;
using difference_type = difference_type_t<RandomAccessIterator1>;
constexpr std::size_t cacheline_size = 64;
constexpr std::size_t storage_size = cacheline_size / sizeof(value_type);
// When possible, swap elements chunk by chunk via a cacheline-sized buffer
// this seems to improve the speed of the algorithm for some types
alignas(cacheline_size) value_type tmp[storage_size];
auto size = last1 - first1;
while (size >= difference_type(storage_size)) {
detail::move(first1, first1 + storage_size, tmp);
first1 = detail::move(first2, first2 + storage_size, first1);
first2 = detail::move(tmp, tmp + storage_size, first2);
size -= storage_size;
}
return std::swap_ranges(first1, last1, first2);
}
//template<typename T> void show_type();
template<typename ForwardIterator1, typename ForwardIterator2>
auto swap_ranges(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2)
-> ForwardIterator2
{
using value_type = remove_cvref_t<value_type_t<ForwardIterator1>>;
using category1 = iterator_category_t<ForwardIterator1>;
using category2 = iterator_category_t<ForwardIterator2>;
// Compute the number of elements that can be stored in a cache line
constexpr std::size_t cacheline_size = 64;
constexpr std::size_t storage_size = cacheline_size / sizeof(value_type);
// All the conditions required to batch element swaps
using can_optimize = std::integral_constant<bool,
std::is_base_of<std::random_access_iterator_tag, category1>::value &&
std::is_base_of<std::random_access_iterator_tag, category2>::value &&
std::is_trivial<value_type>::value &&
(storage_size >= 4)
>;
return swap_ranges_impl(can_optimize{}, first1, last1, first2);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment