Last active
April 12, 2020 21:13
-
-
Save pparuzel/d7c9b708239dcffad5c2bbae33d6a9ae to your computer and use it in GitHub Desktop.
Sparse set implementation in Modern C++ 17
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <memory> // for sparse array | |
#include <string> // string manipulation | |
#include <vector> // for dense array | |
////////////////////////// | |
// SPARSE SET // | |
// time complexity // | |
// of operations // | |
//----------------------// | |
// insert -- O(1) // | |
// remove -- O(1) // | |
// search -- O(1) // | |
// clear -- O(n) // | |
//----------------------// | |
// union -- O(n) // | |
// intersection -- O(n) // | |
// difference -- O(n) // | |
// subset -- O(n) // | |
//----------------------// | |
// sort -- O(nlogn) // | |
////////////////////////// | |
class sparse_set; | |
namespace detail | |
{ | |
template <typename T> | |
std::unique_ptr<T> make_unique_uninitialized(const std::size_t size) | |
{ | |
return std::unique_ptr<T>(new typename std::remove_extent<T>::type[size]); | |
} | |
template <typename T> | |
struct sparse_array_ptr { | |
public: | |
explicit sparse_array_ptr() = default; | |
explicit sparse_array_ptr(std::size_t upper_bound) | |
: data_(make_unique_uninitialized<T[]>(upper_bound)), size_(upper_bound) | |
{ | |
} | |
sparse_array_ptr(const sparse_array_ptr& other) | |
: data_(make_unique_uninitialized<T[]>(other.size()), size_(other.size())) | |
{ | |
std::copy_n(other.data_.get(), other.size(), data_.get()); | |
} | |
sparse_array_ptr(sparse_array_ptr&& other) noexcept | |
: data_(std::move(other.data_)), size_(other.size()) | |
{ | |
other.size_ = 0; | |
} | |
sparse_array_ptr& operator=(sparse_array_ptr copy) | |
{ | |
swap(*this, copy); | |
return *this; | |
} | |
~sparse_array_ptr() | |
{ | |
size_ = 0; | |
} | |
friend void swap(sparse_array_ptr& s1, sparse_array_ptr& s2) noexcept | |
{ | |
using std::swap; | |
swap(s1.size_, s2.size_); | |
swap(s1.data_, s2.data_); | |
} | |
void resize(std::size_t new_size) | |
{ | |
if (new_size > size_) { | |
auto buffer = make_unique_uninitialized<T[]>(new_size); | |
std::copy_n(data_.get(), size_, buffer.get()); | |
std::swap(data_, buffer); | |
size_ = new_size; | |
} | |
} | |
[[nodiscard]] std::size_t size() const noexcept | |
{ | |
return size_; | |
} | |
T& operator[](std::size_t index) noexcept | |
{ | |
return data_[index]; | |
} | |
const T& operator[](std::size_t index) const noexcept | |
{ | |
return data_[index]; | |
} | |
private: | |
std::unique_ptr<T[]> data_{nullptr}; | |
std::size_t size_{0}; | |
}; | |
} // namespace detail | |
class sparse_set | |
{ | |
public: | |
using value_type = std::size_t; | |
using dense_container_t = std::vector<value_type>; | |
using iterator = dense_container_t::iterator; | |
using size_type = dense_container_t::size_type; | |
using sparse_container_t = detail::sparse_array_ptr<size_type>; | |
public: | |
sparse_set() = default; | |
explicit sparse_set(size_type max_value) : sparse_(max_value + 1) | |
{ | |
} | |
explicit sparse_set(std::vector<value_type> dense) : dense_(std::move(dense)) | |
{ | |
fix_sparse_range(dense); | |
fix_sparse_indices(); | |
} | |
sparse_set(const sparse_set& other) | |
{ | |
insert_from_values(other.dense_); | |
} | |
sparse_set(sparse_set&& other) = default; | |
sparse_set(std::initializer_list<value_type> init) noexcept | |
{ | |
insert_from_values(init); | |
} | |
sparse_set& operator=(sparse_set copy) noexcept | |
{ | |
swap(*this, copy); | |
return *this; | |
} | |
sparse_set& operator=(std::vector<value_type> dense) noexcept | |
{ | |
dense_ = std::move(dense); | |
fix_sparse_range(dense); | |
fix_sparse_indices(); | |
return *this; | |
} | |
~sparse_set() = default; | |
friend void swap(sparse_set& s1, sparse_set& s2) noexcept | |
{ | |
using std::swap; | |
swap(s1.dense_, s2.dense_); | |
swap(s1.sparse_, s2.sparse_); | |
} | |
[[nodiscard]] bool empty() const noexcept | |
{ | |
return dense_.empty(); | |
} | |
[[nodiscard]] size_type size() const noexcept | |
{ | |
return dense_.size(); | |
} | |
[[nodiscard]] size_type extent() const noexcept | |
{ | |
return sparse_.size(); | |
} | |
void insert(value_type value) noexcept | |
{ | |
if (value > sparse_.size() - 1) { | |
reserve_value(std::max(sparse_.size() * 2, value)); | |
} else if (contains(value)) { | |
return; | |
} | |
sparse_[value] = dense_.size(); | |
dense_.push_back(value); | |
} | |
[[nodiscard]] bool contains(value_type value) const noexcept | |
{ | |
return value < sparse_.size() && sparse_[value] < dense_.size() && | |
dense_[sparse_[value]] == value; | |
} | |
void remove(value_type value) noexcept | |
{ | |
if (!contains(value)) { | |
return; | |
} | |
const auto index = sparse_[value]; | |
dense_[index] = dense_.back(); | |
sparse_[dense_.back()] = index; | |
dense_.pop_back(); | |
} | |
void clear() noexcept | |
{ | |
dense_.clear(); | |
} | |
[[nodiscard]] size_type find(value_type value) const noexcept | |
{ | |
if (!contains(value)) { | |
return -1; | |
} | |
return sparse_[value]; | |
} | |
void reserve_value(value_type value) | |
{ | |
const auto proposed_size = value + 1; | |
if (proposed_size < sparse_.size()) { | |
dense_.erase(remove_if(dense_.begin(), dense_.end(), | |
[value](auto&& k) { return k > value; }), | |
dense_.end()); | |
} | |
sparse_.resize(proposed_size); | |
} | |
void merge(sparse_set& other) noexcept | |
{ | |
insert_from_values(std::move(other.dense_)); | |
} | |
void merge(sparse_set&& other) noexcept | |
{ | |
insert_from_values(std::move(other.dense_)); | |
} | |
void merge(std::initializer_list<value_type> init_list) noexcept | |
{ | |
insert_from_values(init_list); | |
} | |
auto intersection(const sparse_set& other) noexcept | |
{ | |
return intersection_from_values(other.dense_); | |
} | |
auto intersection(std::initializer_list<value_type> init_list) noexcept | |
{ | |
return intersection_from_values(init_list); | |
} | |
auto difference(const sparse_set& other) const noexcept | |
{ | |
sparse_set result(sparse_.size()); | |
for (auto&& value : dense_) { | |
if (!other.contains(value)) { | |
result.insert(value); | |
} | |
} | |
return result; | |
} | |
bool is_subset_of(const sparse_set& superset) const noexcept | |
{ | |
return all_of(std::cbegin(dense_), std::cend(dense_), | |
[&superset](const auto& value) { return superset.contains(value); }); | |
} | |
bool operator==(const sparse_set& other) const noexcept | |
{ | |
return is_subset_of(other) && other.is_subset_of(*this); | |
} | |
bool operator!=(const sparse_set& other) const noexcept | |
{ | |
return !is_subset_of(other) || !other.is_subset_of(*this); | |
} | |
template <bool Ascending = true> | |
void sort() noexcept | |
{ | |
if constexpr (Ascending) { | |
std::sort(std::begin(dense_), std::end(dense_)); | |
} else { | |
std::sort(std::rbegin(dense_), std::rend(dense_)); | |
} | |
fix_sparse_indices(); | |
} | |
value_type operator[](std::size_t i) const noexcept | |
{ | |
return dense_[i]; | |
} | |
// Only for ranged-based for-loops and immutating traversal | |
[[nodiscard]] auto begin() const noexcept | |
{ | |
return dense_.cbegin(); | |
} | |
[[nodiscard]] auto cbegin() const noexcept | |
{ | |
return dense_.cbegin(); | |
} | |
[[nodiscard]] auto end() const noexcept | |
{ | |
return dense_.cend(); | |
} | |
[[nodiscard]] auto cend() const noexcept | |
{ | |
return dense_.cend(); | |
} | |
[[nodiscard]] auto rbegin() const noexcept | |
{ | |
return dense_.crbegin(); | |
} | |
[[nodiscard]] auto crbegin() const noexcept | |
{ | |
return dense_.crbegin(); | |
} | |
[[nodiscard]] auto rend() const noexcept | |
{ | |
return dense_.crend(); | |
} | |
[[nodiscard]] auto crend() const noexcept | |
{ | |
return dense_.crend(); | |
} | |
template <char Sep = ',', char Space = ' '> | |
std::string str() const noexcept | |
{ | |
std::string result = "{"; | |
; | |
char comma[] = {0, Space, 0}; | |
for (auto&& k : dense_) { | |
result += comma; | |
result += std::to_string(k); | |
comma[0] = Sep; | |
} | |
result += '}'; | |
return result; | |
} | |
private: | |
void fix_sparse_indices() noexcept | |
{ | |
for (size_type i{}; i < dense_.size(); ++i) { | |
sparse_[dense_[i]] = i; | |
} | |
} | |
template <typename Values> | |
sparse_set intersection_from_values(Values&& values) const noexcept | |
{ | |
sparse_set result; | |
for (auto&& value : std::forward<Values>(values)) { | |
if (contains(value)) { | |
result.insert(value); | |
} | |
} | |
return result; | |
} | |
template <typename Values> | |
void insert_from_values(Values&& values) noexcept | |
{ | |
fix_sparse_range(values); | |
for (auto&& k : std::forward<Values>(values)) { | |
insert(k); | |
} | |
} | |
template <typename Container> | |
void fix_sparse_range(Container&& cont) | |
{ | |
if (cont.size() == 0) { | |
return; | |
} | |
const auto iter_max = std::max_element(cont.begin(), cont.end()); | |
sparse_.resize(*iter_max + 1); | |
} | |
void fix_sparse_range(const sparse_set& other) | |
{ | |
sparse_.resize(other.sparse_.size()); | |
} | |
private: | |
dense_container_t dense_{}; | |
sparse_container_t sparse_{1}; | |
}; | |
#include <iostream> // for debugging and helper functions | |
int main() | |
{ | |
boolalpha(std::cout); // print boolean as string not int | |
sparse_set s(20); | |
s.insert(302); | |
assert(s.extent() == 303); | |
s.remove(301); | |
s.insert(4); | |
assert(s.size() == 2 && s.find(4) == 1); | |
s.insert(4); | |
s.insert(2); | |
s.insert(5); | |
assert(s.size() == 4 && s.find(2) == 2 && s.find(5) == 3); | |
std::cout << s.str() << std::endl; | |
s.reserve_value(20); // removes 302 | |
std::cout << s.str() << std::endl; | |
sparse_set cp = s; | |
s.remove(4); | |
assert(s.find(4) == -1); | |
s.clear(); | |
assert(s.size() == 0); | |
std::cout << s.str() << std::endl; | |
s.merge(cp); | |
std::cout << s.str() << std::endl; | |
s.merge(sparse_set{0, 3}); | |
assert((s == sparse_set{4, 2, 5, 0, 3})); | |
std::cout << s.str() << std::endl; | |
s.merge({1, 2, 3, 4, 5, 6, 19}); | |
assert((s == sparse_set{1, 4, 2, 5, 0, 3, 6, 19})); | |
auto moved = std::move(s); | |
assert(s.empty()); | |
std::cout << "s: " << s.str() << std::endl; | |
std::cout << "moved: " << moved.str() << std::endl; | |
s = moved; | |
assert(s == moved); | |
auto x = sparse_set{10, 20}; | |
auto y = sparse_set(5); | |
y.insert(0); | |
y.insert(1); | |
y.insert(2); | |
y.insert(3); | |
y.insert(10); | |
y.insert(20); | |
std::cout << s.str() << std::endl; | |
std::cout << y.str() << std::endl; | |
std::cout << s.intersection(y).str() << std::endl; | |
std::cout << y.intersection(s).str() << std::endl; | |
auto d = s.intersection({19}); | |
std::cout << "d: "; | |
std::cout << d.str() << std::endl; | |
d.insert(8); | |
assert(d.find(8) != -1); | |
d.insert(6); | |
std::cout << "pre-sort: "; | |
std::cout << d.str() << std::endl; | |
d.sort(); | |
std::cout << "post-sort: "; | |
std::cout << d.str() << std::endl; | |
std::cout << d[d.find(8)] << " is at index: " << d.find(8) << std::endl; | |
s.reserve_value(19); | |
y.reserve_value(9); | |
std::cout << s.difference(y).str() << std::endl; | |
std::cout << y.difference(s).str() << std::endl; | |
std::cout << "y is subset of s: " << y.is_subset_of(s) << std::endl; | |
std::cout << "s is subset of y: " << s.is_subset_of(y) << std::endl; | |
for (auto value : s) { | |
std::cout << value << ' '; | |
} | |
std::cout << std::endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment