Last active
November 8, 2020 00:13
-
-
Save ilknarf/755e13245eb64a3d026317b2d1c6304f to your computer and use it in GitHub Desktop.
Patience is a Virtue: Python implementations of sort and merge algorithms
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Rough implementations of patience sorts (p3 sort) as shown in | |
https://www.microsoft.com/en-us/research/publication/patience-is-a-virtue-revisiting-merge-and-sort-on-modern-processors/. | |
Note that performance benchmarks won't be valid, since the data structures used (e.g. deque) don't take advantage of memory locality | |
in the same way that an implementation of cache-local linked memory blocks would. | |
""" | |
from collections import deque | |
from binary_search import binary_search | |
def patience_sort(l, iter_t=list): | |
""" | |
patience_sort creates sorted runs by adding incrementing items | |
to a previous run, and creating new runs from smaller items | |
""" | |
runs = [] | |
i = 0 | |
# while elements still remain, keep adding to runs | |
while i < len(l): | |
n = l[i] | |
for run in runs: | |
if run[-1] <= n: | |
run.append(n) | |
break | |
else: | |
# new run | |
next_run = iter_t() | |
next_run.append(n) | |
runs.append(next_run) | |
i += 1 | |
return runs | |
def patience_star_sort(l): | |
""" | |
patience_star_sort performs a modified patience sort where | |
decrementing items can be appended to the front of a previous run, | |
as well as added to the end of the first run | |
""" | |
runs = [] | |
i = 0 | |
# while elements still remain, keep adding to runs | |
while i < len(l): | |
n = l[i] | |
for j in range(len(runs)): | |
run = runs[j] | |
if run[-1] <= n: | |
run.append(n) | |
if j == 0: | |
while i + 1 < len(l) and l[i + 1] >= n: | |
i += 1 | |
n = l[i] | |
run.append(n) | |
break | |
else: | |
index = binary_search(runs, n) | |
if index != len(runs): | |
# append to run | |
runs[index].appendleft(n) | |
else: | |
# new run | |
next_run = deque() | |
next_run.append(n) | |
runs.append(next_run) | |
i += 1 | |
return runs |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Rough Implementations of the merge methods highlighted in | |
https://www.microsoft.com/en-us/research/publication/patience-is-a-virtue-revisiting-merge-and-sort-on-modern-processors/. | |
In the paper, balanced and unbalanced ping-pong merges were shown to have significant performance increases | |
""" | |
import heapq | |
from collections import deque | |
def k_way_merge(runs): | |
""" | |
k_way_merge performs a k-way merge by placing lists | |
in a heap. | |
""" | |
res = [] | |
# add lists to heap | |
heapq.heapify(runs) | |
# while runs remain | |
while runs: | |
next_list = heapq.heappop(runs) | |
# append first item to result | |
res.append(next_list.popleft()) | |
# if not empty, push to heap | |
if next_list: | |
heapq.heappush(runs, next_list) | |
return res | |
def binary_merge(dest, src1, src2, dest_start, r1_start, r1_length, r2_start, r2_length): | |
""" | |
binary_merge performs a binary merge of two sorted lists into destß | |
""" | |
# copy src if dest is the same as src | |
if src1 is not dest: | |
i1, end1 = r1_start, r1_start + r1_length | |
else: | |
src1 = src1[r1_start:r1_start + r1_length] | |
i1, end1 = 0, r1_length | |
if src2 is not dest: | |
i2, end2 = r2_start, r2_start + r2_length | |
else: | |
src2 = src2[r2_start:r2_start + r2_length] | |
i2, end2 = 0, r2_length | |
combined_end = dest_start + r1_length + r2_length | |
dest_i = dest_start | |
while i1 < end1 and i2 < end2: | |
n1 = src1[i1] | |
n2 = src2[i2] | |
# determine next value | |
if n1 < n2: | |
n = n1 | |
i1 += 1 | |
else: | |
n = n2 | |
i2 += 1 | |
dest[dest_i] = n | |
dest_i += 1 | |
# merge remaining items, if exists | |
if i1 < end1: | |
dest[dest_i:combined_end] = src1[i1:end1] | |
if i2 < end2: | |
dest[dest_i:combined_end] = src2[i2:end2] | |
def ping_pong_balanced(runs): | |
""" | |
ping_pong_balanced performs a balanced ping_pong merge, as | |
laid out in the SIGMOD paper | |
""" | |
runs.sort(key=len) | |
# create two lists to 'ping pong' | |
l1 = [item for run in runs for item in run] | |
l2 = [0] * len(l1) | |
current_run = 1 | |
run_lengths = [len(l) for l in runs] | |
while len(run_lengths) > 1: | |
new_run_lengths = [] | |
start_index = 0 | |
for i in range(len(run_lengths) // 2): | |
# get run lengths | |
r1_length = run_lengths[i * 2] | |
r2_length = run_lengths[i * 2 + 1] | |
# get slice of next list | |
combined_length = r1_length + r2_length | |
merge_start = start_index | |
# get run start indices | |
r1_start = start_index | |
start_index += r1_length | |
r2_start = start_index | |
start_index += r2_length | |
# perform binary merge | |
binary_merge(l2, l1, l1, merge_start, r1_start, r1_length, r2_start, r2_length) | |
# add length to new_run_lengths | |
new_run_lengths.append(combined_length) | |
# if item remaining, add to end of merges | |
if len(run_lengths) % 2 != 0: | |
# copy remaining run | |
l2[start_index:] = l1[start_index:] | |
# add length to run_lengths | |
new_run_lengths.append(run_lengths[-1]) | |
# swap l1 and l2 | |
l1, l2 = l2, l1 | |
run_lengths = new_run_lengths | |
return l1 | |
def ping_pong_unbalanced(runs): | |
""" | |
ping_pong_unbalanced performs an unbalanced ping-pong merge, as laid out | |
in the SIGMOD paper | |
""" | |
# sort by length | |
runs.sort(key=len) | |
# flatten runs, then prepare second array | |
l1 = [item for run in runs for item in run] | |
l2 = [0] * len(l1) | |
# store tuple of (list_1?, run_length), where list_1? indicates if a list is l1 | |
elems = deque((True, len(run)) for run in runs) | |
i = 0 | |
# check if merging first pair cheaper than current pair | |
# increment start index across loops to determine run starts | |
start_index = 0 | |
while len(elems) > 1: | |
# if no next pair exists, reset iteration | |
if i + 1 >= len(elems): | |
i = 0 | |
start_index = 0 | |
# if first pair is cheaper to merge, reset iteration | |
elif elems[0][1] + elems[1][1] < elems[i][1] + elems[i + 1][1]: | |
i = 0 | |
start_index = 0 | |
# get run info from elems | |
r1_list, r1_length = elems[i] | |
r2_list, r2_length = elems[i + 1] | |
# set dest start index for merge | |
dest_start = start_index | |
# get start index of runs, and increment for next runs | |
r1_start = start_index | |
start_index += r1_length | |
r2_start = start_index | |
start_index += r2_length | |
# set sources and dest for merge | |
if r1_list: | |
dest = l2 | |
src1 = l1 | |
else: | |
dest = l1 | |
src1 = l2 | |
src2 = l1 if r2_list else l2 | |
# merge runs | |
binary_merge(dest, src1, src2, dest_start, r1_start, r1_length, r2_start, r2_length) | |
# update elems | |
elems[i] = not elems[i][0], r1_length + r2_length | |
del elems[i + 1] | |
# increment i | |
i += 1 | |
# return merged list (e.g, the remaining element) | |
if elems[0][0]: | |
return l1 | |
return l2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def binary_search(l, v): | |
""" | |
binary search of lists, using the first element of the list for comparison | |
""" | |
lo, hi = 0, len(l) | |
while lo < hi: | |
mid = lo + (hi - lo) // 2 | |
if l[mid][0] >= v: | |
hi = mid | |
else: | |
lo = mid + 1 | |
return lo |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment