Last active
August 30, 2020 14:29
-
-
Save xiaodaigh/994a994faffe93f5d82cad906749cb84 to your computer and use it in GitHub Desktop.
Fast implementation of `nuniuqe` in a SORTED vector
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
x = rand(1:1_000_000, 1_000_000_000) | |
using SortingLab | |
fsort!(x) | |
function unroll_loop(x) | |
count = 0 | |
@inbounds count += x[1] != x[2] | |
@inbounds count += x[2] != x[3] | |
@inbounds count += x[3] != x[4] | |
@inbounds count += x[4] != x[5] | |
@inbounds count += x[5] != x[6] | |
@inbounds count += x[6] != x[7] | |
@inbounds count += x[7] != x[8] | |
@inbounds count += x[8] != x[9] | |
l = length(x) | |
upto = 8div(l, 8) - 8 | |
@inbounds for i in 8:8:upto | |
count += x[i+1] != x[i+2] | |
count += x[i+2] != x[i+3] | |
count += x[i+3] != x[i+4] | |
count += x[i+4] != x[i+5] | |
count += x[i+5] != x[i+6] | |
count += x[i+6] != x[i+7] | |
count += x[i+7] != x[i+8] | |
count += x[i+8] != x[i+9] | |
end | |
for i in upto+1:l | |
count += x[i] != x[i-1] | |
end | |
count | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using LoopVectorization | |
function nunique(x) | |
count = 1 | |
@avx for i in 1:length(x)-1 | |
count += !isequal(x[i], x[i+1]) | |
end | |
count | |
end | |
@time nunique(x) | |
function lo_hi_partition(n, parts = Threads.nthreads()) | |
part_len = div(n, parts) | |
lo = collect(0:part_len:n-part_len) .+ 1 | |
hi = similar(lo) | |
hi[1:end-1] .= lo[2:end] .- 1 | |
hi[end] = n | |
lo, hi | |
end | |
lo_hi_partition(length(x)) | |
function pnunique(x) | |
lo, hi = lo_hi_partition(length(x)) | |
cnt = 1 | |
nt = Threads.nthreads() | |
Threads.@threads for i in 1:nt | |
l, h = lo[i], hi[i] | |
@inbounds cnt += nunique(@view x[l:h]) | |
end | |
for (l, h) in zip(@view(lo[2:end]), hi) | |
@inbounds cnt -= isequal(x[h], x[l]) | |
end | |
cnt | |
end | |
@benchmark pnunique(x) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment