Skip to content

Instantly share code, notes, and snippets.

@hrj
Forked from akihiro4chawon/parasort.scala
Last active December 24, 2015 07:09

Revisions

  1. hrj revised this gist Sep 30, 2013. 2 changed files with 33 additions and 143 deletions.
    30 changes: 20 additions & 10 deletions parasort.scala
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,16 @@
    import java.util.Arrays

    object Util {
    import concurrent._
    import ExecutionContext.Implicits.global
    def par(f1 : => Unit, f2 : => Unit) {
    val fut1 = future { f1 }
    val fut2 = future { f2 }
    Await.ready(fut1.zip(fut2), duration.Duration.Inf)
    }
    }
    import Util._

    abstract class Sorter {
    def sorted(a: Array[Int]): Array[Int]
    }
    @@ -15,7 +26,6 @@ object DivideAndMergeParallelSorter extends Sorter {
    def sorted(a: Array[Int]) = {
    require(a.length >= 2)
    import scala.annotation.tailrec
    import scala.concurrent.ops._
    val len = a.length
    val half = len / 2
    par(Arrays.sort(a, 0, half), Arrays.sort(a, half, len))
    @@ -42,7 +52,6 @@ object DivideAndMergeParallelSorter2 extends Sorter {
    def sorted(a: Array[Int]) = {
    require(a.length >= scala.collection.parallel.availableProcessors)
    import scala.annotation.tailrec
    import scala.concurrent.ops._

    val nDiv = collection.parallel.availableProcessors
    val len = a.length
    @@ -543,16 +552,15 @@ object Main extends App {
    def getShuffled = scala.util.Random.shuffle(src).toArray
    }

    println(QuickSeqSorter.sorted(Array(3, 5, 1, 2, 4)) toList)
    val impls = Seq(
    SimpleSorter,
    DivideAndMergeParallelSorter,
    DivideAndMergeParallelSorter2,
    // ShellParallelSorter,
    // ShellParallelSorterOpt,
    // ShellParallelSorterOpt2,
    // ShellParallelSorterThread,
    // ShellParallelSorterThreadPool,
    ShellParallelSorter,
    ShellParallelSorterOpt,
    ShellParallelSorterOpt2,
    ShellParallelSorterThread,
    ShellParallelSorterThreadPool,
    QuickSeqSorter,
    QuickSeqQueueSorter,
    QuickParallelSorter,
    @@ -564,7 +572,7 @@ object Main extends App {
    System.gc; Thread.sleep(300);
    val times = for (i <- 1 to 10) yield benchmark(sorter, i)
    val noOutliers = times.sorted.drop(2).reverse.drop(2)
    println("avg. " + noOutliers.sum / noOutliers.size / 1000000 + "[ms]")
    println(sorter.getClass.getName+" avg. " + noOutliers.sum / noOutliers.size / 1000000 + "[ms]")
    }

    def benchmark(sorter: Sorter, i: Int) = {
    @@ -573,7 +581,7 @@ object Main extends App {
    val t1 = System.nanoTime
    sorter.sorted(r)
    val t = System.nanoTime - t1
    println(sorter.getClass.getName+" #"+i+": "+((t)/1000000)+"[ms]")
    // println(sorter.getClass.getName+" #"+i+": "+((t)/1000000)+"[ms]")
    t
    }

    @@ -583,3 +591,5 @@ object Main extends App {
    sorters foreach {s => assert(Arrays.equals(s.sorted(r.clone), a))}
    }
    }

    Main.main(args)
    146 changes: 13 additions & 133 deletions zBenchmarkResult.txt
    Original file line number Diff line number Diff line change
    @@ -1,133 +1,13 @@
    // @ Core i7 920
    SimpleSorter$ #1: 138[ms]
    SimpleSorter$ #2: 135[ms]
    SimpleSorter$ #3: 134[ms]
    SimpleSorter$ #4: 135[ms]
    SimpleSorter$ #5: 134[ms]
    SimpleSorter$ #6: 135[ms]
    SimpleSorter$ #7: 135[ms]
    SimpleSorter$ #8: 138[ms]
    SimpleSorter$ #9: 137[ms]
    SimpleSorter$ #10: 135[ms]
    avg. 135[ms]
    DivideAndMergeParallelSorter$ #1: 112[ms]
    DivideAndMergeParallelSorter$ #2: 75[ms]
    DivideAndMergeParallelSorter$ #3: 76[ms]
    DivideAndMergeParallelSorter$ #4: 100[ms]
    DivideAndMergeParallelSorter$ #5: 88[ms]
    DivideAndMergeParallelSorter$ #6: 74[ms]
    DivideAndMergeParallelSorter$ #7: 75[ms]
    DivideAndMergeParallelSorter$ #8: 76[ms]
    DivideAndMergeParallelSorter$ #9: 88[ms]
    DivideAndMergeParallelSorter$ #10: 74[ms]
    avg. 80[ms]
    DivideAndMergeParallelSorter2$ #1: 113[ms]
    DivideAndMergeParallelSorter2$ #2: 51[ms]
    DivideAndMergeParallelSorter2$ #3: 51[ms]
    DivideAndMergeParallelSorter2$ #4: 46[ms]
    DivideAndMergeParallelSorter2$ #5: 42[ms]
    DivideAndMergeParallelSorter2$ #6: 42[ms]
    DivideAndMergeParallelSorter2$ #7: 48[ms]
    DivideAndMergeParallelSorter2$ #8: 50[ms]
    DivideAndMergeParallelSorter2$ #9: 49[ms]
    DivideAndMergeParallelSorter2$ #10: 51[ms]
    avg. 49[ms]
    ShellParallelSorter$ #1: 212[ms]
    ShellParallelSorter$ #2: 160[ms]
    ShellParallelSorter$ #3: 166[ms]
    ShellParallelSorter$ #4: 188[ms]
    ShellParallelSorter$ #5: 157[ms]
    ShellParallelSorter$ #6: 165[ms]
    ShellParallelSorter$ #7: 186[ms]
    ShellParallelSorter$ #8: 158[ms]
    ShellParallelSorter$ #9: 162[ms]
    ShellParallelSorter$ #10: 183[ms]
    avg. 170[ms]
    ShellParallelSorterOpt$ #1: 124[ms]
    ShellParallelSorterOpt$ #2: 110[ms]
    ShellParallelSorterOpt$ #3: 145[ms]
    ShellParallelSorterOpt$ #4: 90[ms]
    ShellParallelSorterOpt$ #5: 98[ms]
    ShellParallelSorterOpt$ #6: 85[ms]
    ShellParallelSorterOpt$ #7: 84[ms]
    ShellParallelSorterOpt$ #8: 107[ms]
    ShellParallelSorterOpt$ #9: 114[ms]
    ShellParallelSorterOpt$ #10: 144[ms]
    avg. 107[ms]
    ShellParallelSorterOpt2$ #1: 117[ms]
    ShellParallelSorterOpt2$ #2: 80[ms]
    ShellParallelSorterOpt2$ #3: 79[ms]
    ShellParallelSorterOpt2$ #4: 79[ms]
    ShellParallelSorterOpt2$ #5: 79[ms]
    ShellParallelSorterOpt2$ #6: 80[ms]
    ShellParallelSorterOpt2$ #7: 78[ms]
    ShellParallelSorterOpt2$ #8: 81[ms]
    ShellParallelSorterOpt2$ #9: 77[ms]
    ShellParallelSorterOpt2$ #10: 78[ms]
    avg. 79[ms]
    ShellParallelSorterThread$ #1: 129[ms]
    ShellParallelSorterThread$ #2: 125[ms]
    ShellParallelSorterThread$ #3: 119[ms]
    ShellParallelSorterThread$ #4: 116[ms]
    ShellParallelSorterThread$ #5: 120[ms]
    ShellParallelSorterThread$ #6: 123[ms]
    ShellParallelSorterThread$ #7: 121[ms]
    ShellParallelSorterThread$ #8: 122[ms]
    ShellParallelSorterThread$ #9: 119[ms]
    ShellParallelSorterThread$ #10: 122[ms]
    avg. 121[ms]
    ShellParallelSorterThreadPool$ #1: 448[ms]
    ShellParallelSorterThreadPool$ #2: 397[ms]
    ShellParallelSorterThreadPool$ #3: 389[ms]
    ShellParallelSorterThreadPool$ #4: 487[ms]
    ShellParallelSorterThreadPool$ #5: 473[ms]
    ShellParallelSorterThreadPool$ #6: 389[ms]
    ShellParallelSorterThreadPool$ #7: 396[ms]
    ShellParallelSorterThreadPool$ #8: 398[ms]
    ShellParallelSorterThreadPool$ #9: 404[ms]
    ShellParallelSorterThreadPool$ #10: 397[ms]
    avg. 407[ms]
    QuickSeqSorter$ #1: 124[ms]
    QuickSeqSorter$ #2: 122[ms]
    QuickSeqSorter$ #3: 121[ms]
    QuickSeqSorter$ #4: 121[ms]
    QuickSeqSorter$ #5: 122[ms]
    QuickSeqSorter$ #6: 121[ms]
    QuickSeqSorter$ #7: 121[ms]
    QuickSeqSorter$ #8: 122[ms]
    QuickSeqSorter$ #9: 121[ms]
    QuickSeqSorter$ #10: 124[ms]
    avg. 122[ms]
    QuickSeqQueueSorter$ #1: 131[ms]
    QuickSeqQueueSorter$ #2: 126[ms]
    QuickSeqQueueSorter$ #3: 125[ms]
    QuickSeqQueueSorter$ #4: 124[ms]
    QuickSeqQueueSorter$ #5: 124[ms]
    QuickSeqQueueSorter$ #6: 123[ms]
    QuickSeqQueueSorter$ #7: 126[ms]
    QuickSeqQueueSorter$ #8: 123[ms]
    QuickSeqQueueSorter$ #9: 124[ms]
    QuickSeqQueueSorter$ #10: 124[ms]
    avg. 124[ms]
    QuickParallelSorter$ #1: 34[ms]
    QuickParallelSorter$ #2: 32[ms]
    QuickParallelSorter$ #3: 33[ms]
    QuickParallelSorter$ #4: 31[ms]
    QuickParallelSorter$ #5: 28[ms]
    QuickParallelSorter$ #6: 31[ms]
    QuickParallelSorter$ #7: 29[ms]
    QuickParallelSorter$ #8: 29[ms]
    QuickParallelSorter$ #9: 31[ms]
    QuickParallelSorter$ #10: 29[ms]
    avg. 31[ms]
    ParallelSorter #1: 36[ms]
    ParallelSorter #2: 140[ms]
    ParallelSorter #3: 29[ms]
    ParallelSorter #4: 32[ms]
    ParallelSorter #5: 30[ms]
    ParallelSorter #6: 142[ms]
    ParallelSorter #7: 29[ms]
    ParallelSorter #8: 28[ms]
    ParallelSorter #9: 29[ms]
    ParallelSorter #10: 141[ms]
    avg. 50[ms]
    # i7-3610QM CPU @ 2.30GHz
    Main$$anon$1$SimpleSorter$ avg. 83[ms]
    Main$$anon$1$DivideAndMergeParallelSorter$ avg. 58[ms]
    Main$$anon$1$DivideAndMergeParallelSorter2$ avg. 31[ms]
    Main$$anon$1$ShellParallelSorter$ avg. 92[ms]
    Main$$anon$1$ShellParallelSorterOpt$ avg. 82[ms]
    Main$$anon$1$ShellParallelSorterOpt2$ avg. 74[ms]
    Main$$anon$1$ShellParallelSorterThread$ avg. 114[ms]
    Main$$anon$1$ShellParallelSorterThreadPool$ avg. 298[ms]
    Main$$anon$1$QuickSeqSorter$ avg. 92[ms]
    Main$$anon$1$QuickSeqQueueSorter$ avg. 91[ms]
    Main$$anon$1$QuickParallelSorter$ avg. 32[ms]
    Main$$anon$1$ParallelSorter avg. 29[ms]
  2. @akihiro4chawon akihiro4chawon revised this gist May 27, 2011. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion parasort.scala
    Original file line number Diff line number Diff line change
    @@ -499,7 +499,7 @@ class ParallelSorter extends Sorter with Runnable {
    }

    if (r - p < 19*10*3) {
    java.util.Arrays.sort(array, p, r)
    java.util.Arrays.sort(array, p, r + 1)
    if (nRemains.addAndGet(p - r - 1) == 0) {
    isDone = true
    semaphore.release(nThreads)
  3. @akihiro4chawon akihiro4chawon revised this gist May 26, 2011. 2 changed files with 680 additions and 275 deletions.
    760 changes: 555 additions & 205 deletions parasort.scala
    Original file line number Diff line number Diff line change
    @@ -1,235 +1,585 @@
    import java.util.Arrays

    abstract class Sorter {
    def sorted(a: Array[Int]): Array[Int]
    def sorted(a: Array[Int]): Array[Int]
    }

    object SimpleSorter extends Sorter {
    def sorted(a: Array[Int]) = {
    Arrays.sort(a)
    a
    }
    def sorted(a: Array[Int]) = {
    Arrays.sort(a)
    a
    }
    }

    object DivideAndMergeParallelSorter extends Sorter {
    def sorted(a: Array[Int]) = {
    require(a.length >= 2)
    import scala.annotation.tailrec
    import scala.concurrent.ops._
    val len = a.length
    val half = len / 2
    par(Arrays.sort(a, 0, half), Arrays.sort(a, half, len))
    val ret = new Array[Int](a.length)
    @tailrec
    def merge(i: Int, j: Int, k: Int) {
    if (a(j) <= a(k)) {
    ret(i) = a(j)
    if (j < half - 1) merge(i + 1, j + 1, k)
    else System.arraycopy(a, k, ret, i + 1, len - k)
    } else {
    ret(i) = a(k)
    if (k < len - 1) merge(i + 1, j, k + 1)
    else System.arraycopy(a, j, ret, i + 1, half - j)
    }
    }
    merge(0, 0, half)
    ret
    }
    def sorted(a: Array[Int]) = {
    require(a.length >= 2)
    import scala.annotation.tailrec
    import scala.concurrent.ops._
    val len = a.length
    val half = len / 2
    par(Arrays.sort(a, 0, half), Arrays.sort(a, half, len))

    val ret = new Array[Int](a.length)
    @tailrec
    def merge(i: Int, j: Int, k: Int) {
    if (a(j) <= a(k)) {
    ret(i) = a(j)
    if (j < half - 1) merge(i + 1, j + 1, k)
    else System.arraycopy(a, k, ret, i + 1, len - k)
    } else {
    ret(i) = a(k)
    if (k < len - 1) merge(i + 1, j, k + 1)
    else System.arraycopy(a, j, ret, i + 1, half - j)
    }
    }
    merge(0, 0, half)
    ret
    }
    }

    object DivideAndMergeParallelSorter2 extends Sorter {
    def sorted(a: Array[Int]) = {
    require(a.length >= scala.collection.parallel.availableProcessors)
    import scala.annotation.tailrec
    import scala.concurrent.ops._
    val nDiv = collection.parallel.availableProcessors
    val len = a.length
    val pslices = (0 until nDiv).par map {i => Arrays.copyOfRange(a, i * len / nDiv, (i + 1) * len / nDiv)}
    pslices foreach (Arrays.sort _)
    def merge(a: Array[Int], b: Array[Int]): Array[Int] = {
    val alen = a.length
    val blen = b.length
    val ret = new Array[Int](alen + blen);
    @tailrec def rec(i: Int, j: Int, k: Int) {
    if (a(j) <= b(k)) {
    ret(i) = a(j)
    if (j < alen - 1) rec(i + 1, j + 1, k)
    else System.arraycopy(b, k, ret, i + 1, blen - k)
    } else {
    ret(i) = b(k)
    if (k < blen - 1) rec(i + 1, j, k + 1)
    else System.arraycopy(a, j, ret, i + 1, alen - j)
    }
    }
    rec(0, 0, 0)
    ret
    }
    pslices reduce merge
    }
    def sorted(a: Array[Int]) = {
    require(a.length >= scala.collection.parallel.availableProcessors)
    import scala.annotation.tailrec
    import scala.concurrent.ops._

    val nDiv = collection.parallel.availableProcessors
    val len = a.length
    val pslices = (0 until nDiv).par map {i => Arrays.copyOfRange(a, i * len / nDiv, (i + 1) * len / nDiv)}
    pslices foreach (Arrays.sort _)

    def merge(a: Array[Int], b: Array[Int]): Array[Int] = {
    val alen = a.length
    val blen = b.length
    val ret = new Array[Int](alen + blen);
    @tailrec def rec(i: Int, j: Int, k: Int) {
    if (a(j) <= b(k)) {
    ret(i) = a(j)
    if (j < alen - 1) rec(i + 1, j + 1, k)
    else System.arraycopy(b, k, ret, i + 1, blen - k)
    } else {
    ret(i) = b(k)
    if (k < blen - 1) rec(i + 1, j, k + 1)
    else System.arraycopy(a, j, ret, i + 1, alen - j)
    }
    }
    rec(0, 0, 0)
    ret
    }
    pslices reduce merge
    }
    }

    object ShellParallelSorter extends Sorter {
    def sorted(a: Array[Int]) = {
    val hInit = (Iterator.iterate(1)(_ * 3 + 1) find (a.length <) get) / 3
    for (h <- Iterator.iterate(hInit)(_ / 3) takeWhile (1 <=)) {
    for (k <- 0 until h par) {
    var i = k - h; while ({i += h; i < a.size}) {
    def sorted(a: Array[Int]) = {
    val hInit = (Iterator.iterate(1)(_ * 3 + 1) find (a.length <) get) / 3
    for (h <- Iterator.iterate(hInit)(_ / 3) takeWhile (1 <=)) {
    for (k <- 0 until h par) {
    var i = k - h; while ({i += h; i < a.size}) {
    // for (i <- k until a.size by h) {
    val v = a(i)
    var j = i
    while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - h)
    j -= h
    }
    a(j) = v
    }
    }
    }
    a
    }
    val v = a(i)
    var j = i
    while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - h)
    j -= h
    }
    a(j) = v
    }
    }
    }
    a
    }
    }

    object ShellParallelSorterOpt extends Sorter {
    def sorted(a: Array[Int]) = {
    val len = a.length
    var h = 1; while (h < len) {h *= 3; h += 1}
    while ({h /= 3; h >= 1}) {
    for (k <- 0 until h par) {
    var i = k - h; while ({i += h; i < len}) {
    val v = a(i)
    var j = i
    while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - h)
    j -= h
    }
    a(j) = v
    }
    }
    }
    a
    }
    def sorted(a: Array[Int]) = {
    val len = a.length
    var h = 1; while (h < len) {h *= 3; h += 1}
    while ({h /= 3; h >= 1}) {
    for (k <- 0 until h par) {
    var i = k - h; while ({i += h; i < len}) {
    val v = a(i)
    var j = i
    while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - h)
    j -= h
    }
    a(j) = v
    }
    }
    }
    a
    }
    }

    object ShellParallelSorterOpt2 extends Sorter {
    def sorted(a: Array[Int]) = {
    val len = a.length
    var h = 1; while (h < len) {h *= 3; h += 1}
    h/=3;h/=3
    while ({h /= 3; h > 1}) {
    for (k <- 0 until h par) {
    var i = k - h; while ({i += h; i < len}) {
    val v = a(i)
    var j = i
    while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - h)
    j -= h
    }
    a(j) = v
    }
    }
    }
    var i = 0; while ({i += 1; i < len}) {
    val v = a(i)
    var j = i; while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - 1)
    j -= 1
    }
    a(j) = v
    }
    a
    }
    def sorted(a: Array[Int]) = {
    val len = a.length
    var h = 1; while (h < len) {h *= 3; h += 1}
    h/=3;h/=3
    while ({h /= 3; h > 1}) {
    for (k <- 0 until h par) {
    var i = k - h; while ({i += h; i < len}) {
    val v = a(i)
    var j = i
    while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - h)
    j -= h
    }
    a(j) = v
    }
    }
    }
    var i = 0; while ({i += 1; i < len}) {
    val v = a(i)
    var j = i; while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - 1)
    j -= 1
    }
    a(j) = v
    }
    a
    }
    }

    object ShellParallelSorterThread extends Sorter with Runnable {
    val count = new java.util.concurrent.atomic.AtomicInteger
    var h: Int = 1
    var a: Array[Int] = null

    def run() {
    var i: Int = 0
    val len = this.a.length
    val a = this.a
    val h = this.h
    while ({i = count.getAndDecrement; i >= 0}) {
    i -= h; while ({i += h; i < len}) {
    val v = a(i)
    var j = i
    while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - h)
    j -= h
    }
    a(j) = v
    }
    }
    }
    def sorted(a: Array[Int]) = {
    this.a = a
    val len = a.length
    while (h < len) {h *= 3; h += 1}

    h /= 3;
    while ({h /= 3; h > 1}) {
    count.set(h - 1)
    val nThreads = (Runtime.getRuntime.availableProcessors - 1) min (h - 1)
    val pool = Array.fill(nThreads)(new Thread(this))
    pool foreach {_.start}
    run
    pool foreach {_.join}
    }

    var i = 0; while ({i += 1; i < len}) {
    val v = a(i)
    var j = i; while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - 1)
    j -= 1
    }
    a(j) = v
    }
    a
    }
    val count = new java.util.concurrent.atomic.AtomicInteger
    var h: Int = 1
    var a: Array[Int] = null

    def run() {
    var i: Int = 0
    val len = this.a.length
    val a = this.a
    val h = this.h
    while ({i = count.getAndDecrement; i >= 0}) {
    i -= h; while ({i += h; i < len}) {
    val v = a(i)
    var j = i
    while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - h)
    j -= h
    }
    a(j) = v
    }
    }
    }
    def sorted(a: Array[Int]) = {
    this.a = a
    val len = a.length
    while (h < len) {h *= 3; h += 1}

    h /= 3;
    while ({h /= 3; h > 1}) {
    count.set(h - 1)
    val nThreads = (Runtime.getRuntime.availableProcessors - 1) min (h - 1)
    val pool = Array.fill(nThreads)(new Thread(this))
    pool foreach {_.start}
    run
    pool foreach {_.join}
    }

    var i = 0; while ({i += 1; i < len}) {
    val v = a(i)
    var j = i; while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - 1)
    j -= 1
    }
    a(j) = v
    }
    a
    }
    }

    object ShellParallelSorterThreadPool extends Sorter with Runnable {
    import java.util.concurrent._
    val count = new java.util.concurrent.atomic.AtomicInteger
    var latch: CountDownLatch = null
    var h: Int = 1
    var a: Array[Int] = null

    def run() {
    var i: Int = 0
    val len = this.a.length
    val a = this.a
    val h = this.h
    i = count.getAndDecrement
    i -= h;
    while ({i += h; i < len}) {
    val v = a(i)
    var j = i
    while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - h)
    j -= h
    }
    a(j) = v
    }
    latch.countDown
    }
    def sorted(a: Array[Int]) = {
    this.a = a
    val len = a.length
    while (h < len) {h *= 3; h += 1}
    h /= 3;

    val pool = Executors.newFixedThreadPool(Runtime.getRuntime.availableProcessors)
    while ({h /= 3; h > 1}) {
    count.set(h - 1)
    latch = new CountDownLatch(h)
    // 0 until h foreach {i => pool.execute(this)}
    var i = 0; while (i < h) {pool.execute(this); i += 1}
    latch.await
    }

    var i = 0; while ({i += 1; i < len}) {
    val v = a(i)
    var j = i; while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - 1)
    j -= 1
    }
    a(j) = v
    }
    a
    }
    }

    object QuickSeqSorter extends Sorter {
    import scala.annotation.tailrec

    def sorted(a: Array[Int]) = {
    def isort(b: Int, e: Int) = {
    var i = b; while ({i += 1; i <= e}) {
    var j = i; while (j > b && a(j) < a(j - 1)) {
    val t = a(j); a(j) = a(j - 1); a(j - 1) = t
    j -= 1
    }
    }
    }

    def partition(p: Int, r: Int) = {
    {
    val i = p + scala.util.Random.nextInt(r - p)
    val t = a(p); a(p) = a(i); a(i) = t
    }

    val x = a(p)
    var k = p

    var l = r + 1
    while ({k += 1; (a(k) <= x && k < r)}) {};
    while ({l -= 1; a(l) > x}) {};
    while (k < l) {
    val t = a(k); a(k) = a(l); a(l) = t
    while ({k += 1; a(k) <= x}) {}
    while ({l -= 1; a(l) > x}) {}
    }
    val t = a(p); a(p) = a(l); a(l) = t
    l
    }

    def rec(p: Int, r: Int) {
    if (r - p < 7)
    isort(p, r)
    else {
    val q = partition(p, r)
    rec(p, q - 1)
    rec(q + 1, r)
    }
    }
    rec(0, a.length - 1)
    a
    }
    }

    class LightQueue(cap: Int) {
    private val q = new Array[Int](cap)
    private var beg, end = 0
    final def add(a: Int, b: Int) {
    q(beg) = a; beg += 1; //beg &= (cap - 1)
    q(beg) = b; beg += 1; beg &= (cap - 1)
    }
    final def pop() = {
    val ret = q(end)
    end += 1; end &= (cap - 1)
    ret
    }
    final def isEmpty = beg == end
    }

    class LightStack(cap: Int) {
    private val q = new Array[Int](cap)
    private var end = 0
    final def add(a: Int, b: Int) {
    q(end) = b; end += 1
    q(end) = a; end += 1
    }
    final def pop() = {
    end -= 1
    q(end)
    }
    final def isEmpty = end == 0
    }

    object QuickSeqQueueSorter extends Sorter {

    def sorted(a: Array[Int]) = {
    def isort(b: Int, e: Int) = {
    var i = b; while ({i += 1; i <= e}) {
    var j = i; while (j > b && a(j) < a(j - 1)) {
    val t = a(j); a(j) = a(j - 1); a(j - 1) = t
    j -= 1
    }
    }
    }
    def partition(p: Int, r: Int) = {
    val i = p + scala.util.Random.nextInt(r - p)
    val x = a(i); a(i) = a(p); a(p) = x
    var k = p
    var l = r + 1
    while ({k += 1; (a(k) <= x && k < r)}) {};
    while ({l -= 1; a(l) > x}) {};
    while (k < l) {
    val t = a(k); a(k) = a(l); a(l) = t
    while ({k += 1; a(k) <= x}) {}
    while ({l -= 1; a(l) > x}) {}
    }
    val t = a(p); a(p) = a(l); a(l) = t
    l
    }

    val queue = new LightStack(a.length)
    queue.add(0, a.length - 1)
    while (!queue.isEmpty) {
    var p, r: Int = 0
    synchronized {
    p = queue.pop
    r = queue.pop
    }
    if (r - p < 19) {
    isort(p, r)
    } else {
    val q = partition(p, r)
    synchronized {
    queue.add(p, q - 1)
    queue.add(q + 1, r)
    }
    }
    }
    a
    }
    }

    object QuickParallelSorter extends Sorter with Runnable {
    import java.util.concurrent.Semaphore
    import java.util.concurrent.atomic.AtomicInteger
    var queue: LightStack = _
    var array: Array[Int] = _
    var semaphore: Semaphore = _
    var isDone: Boolean = _
    var nRemains: AtomicInteger = _
    var nThreads: Int = _

    def run {
    val a = array
    val queue = this.queue

    def isort(b: Int, e: Int) = {
    var i = b; while ({i += 1; i <= e}) {
    var j = i; while (j > b && a(j) < a(j - 1)) {
    val t = a(j); a(j) = a(j - 1); a(j - 1) = t
    j -= 1
    }
    }
    }
    def qsort(p: Int, r: Int) {
    if (r - p < 13)
    isort(p, r)
    else {
    val q = partition(p, r)
    qsort(p, q - 1)
    qsort(q + 1, r)
    }
    }
    def partition(p: Int, r: Int) = {
    val i = p + scala.util.Random.nextInt(r - p)
    val x = a(i); a(i) = a(p); a(p) = x
    var k = p
    var l = r + 1
    while ({k += 1; (a(k) <= x && k < r)}) {};
    while ({l -= 1; a(l) > x}) {};
    while (k < l) {
    val t = a(k); a(k) = a(l); a(l) = t
    while ({k += 1; a(k) <= x}) {}
    while ({l -= 1; a(l) > x}) {}
    }
    val t = a(p); a(p) = a(l); a(l) = t
    l
    }

    while (true) {
    semaphore.acquire
    if (isDone) return
    var p, r: Int = 0
    synchronized {
    p = queue.pop
    r = queue.pop
    }
    if (r - p < 19*10*3) {
    qsort(p, r)
    if (nRemains.addAndGet(p - r - 1) == 0) {
    isDone = true
    semaphore.release(nThreads)
    }
    } else {
    val q = partition(p, r)
    var nsem = 0
    synchronized {
    if (p != q) {queue.add(p, q - 1); nsem += 1}
    if (q != r) {queue.add(q + 1, r); nsem += 1}
    }
    nRemains.decrementAndGet
    semaphore.release(nsem)
    }
    }
    }
    def sorted(a: Array[Int]) = {
    queue = new LightStack(a.length)
    queue.add(0, a.length - 1)
    array = a
    isDone = false
    nThreads = Runtime.getRuntime.availableProcessors

    semaphore = new Semaphore(1)
    nRemains = new AtomicInteger(a.length)

    val threads = Array.fill(nThreads)(new Thread(this))
    threads foreach {_.start}
    threads foreach {_.join}
    a
    }
    }

    class ParallelSorter extends Sorter with Runnable {
    import java.util.concurrent.Semaphore
    import java.util.concurrent.atomic.AtomicInteger

    var queue: LightStack = null
    var array: Array[Int] = null
    var semaphore: Semaphore = null
    var isDone = false
    var nRemains: AtomicInteger = null
    var nThreads: Int = _

    def run {
    def partition(p: Int, r: Int) = {
    val i = p + scala.util.Random.nextInt(r - p)
    val x = array(i); array(i) = array(p); array(p) = x
    var k = p
    var l = r + 1
    while ({k += 1; (array(k) <= x && k < r)}) {};
    while ({l -= 1; array(l) > x}) {};
    while (k < l) {
    val t = array(k); array(k) = array(l); array(l) = t
    while ({k += 1; array(k) <= x}) {}
    while ({l -= 1; array(l) > x}) {}
    }
    val t = array(p); array(p) = array(l); array(l) = t
    l
    }

    while (true) {
    semaphore.acquire
    if (isDone) return

    var p, r: Int = 0
    synchronized {
    p = queue.pop
    r = queue.pop
    }

    if (r - p < 19*10*3) {
    java.util.Arrays.sort(array, p, r)
    if (nRemains.addAndGet(p - r - 1) == 0) {
    isDone = true
    semaphore.release(nThreads)
    }
    } else {
    val q = partition(p, r)
    var nsem = 0
    synchronized {
    if (p != q) {queue.add(p, q - 1); nsem += 1}
    if (q != r) {queue.add(q + 1, r); nsem += 1}
    }
    nRemains.decrementAndGet
    semaphore.release(nsem)
    }
    }
    }

    def sorted(a: Array[Int]) = {
    this.queue = new LightStack(a.length)
    this.queue.add(0, a.length - 1)
    this.array = a
    this.isDone = false
    this.semaphore = new Semaphore(1)
    this.nRemains = new AtomicInteger(a.length)
    this.nThreads = Runtime.getRuntime.availableProcessors

    val threads = Array.fill(nThreads)(new Thread(this))

    threads foreach {_.start}
    threads foreach {_.join}
    a
    }
    }

    object Main extends App {
    import scala.collection.mutable.WrappedArray
    import scala.util.Random

    object RandomSource {
    private val src: WrappedArray[Int] = Array.range(0, 1000000)
    def getShuffled = scala.util.Random.shuffle(src).toArray
    }

    val impls = Seq(
    SimpleSorter,
    DivideAndMergeParallelSorter,
    DivideAndMergeParallelSorter2,
    ShellParallelSorter,
    ShellParallelSorterOpt,
    ShellParallelSorterOpt2,
    ShellParallelSorterThread)

    import scala.collection.mutable.WrappedArray
    import scala.util.Random

    object RandomSource {
    private val src: WrappedArray[Int] = Array.range(0, 1000000)
    def getShuffled = scala.util.Random.shuffle(src).toArray
    }

    println(QuickSeqSorter.sorted(Array(3, 5, 1, 2, 4)) toList)
    val impls = Seq(
    SimpleSorter,
    DivideAndMergeParallelSorter,
    DivideAndMergeParallelSorter2,
    // ShellParallelSorter,
    // ShellParallelSorterOpt,
    // ShellParallelSorterOpt2,
    // ShellParallelSorterThread,
    // ShellParallelSorterThreadPool,
    QuickSeqSorter,
    QuickSeqQueueSorter,
    QuickParallelSorter,
    new ParallelSorter
    )

    check(impls.head, impls.tail :_*)
    impls foreach { sorter =>
    System.gc
    val times = for (i <- 1 to 10) yield benchmark(sorter, i)
    val noOutliers = times.sorted.drop(2).reverse.drop(2)
    println("avg. " + noOutliers.sum / noOutliers.size / 1000000 + "[ms]")
    }

    def benchmark(sorter: Sorter, i: Int) = {
    val r = RandomSource.getShuffled
    val t1 = System.nanoTime
    sorter.sorted(r)
    val t = System.nanoTime - t1
    println(sorter.getClass.getName+" #"+i+": "+((t)/1000000)+"[ms]")
    t
    }

    def check(refSorter: Sorter, sorters: Sorter*) {
    val r = RandomSource.getShuffled
    val a = refSorter.sorted(r.clone)
    sorters foreach {s => assert(Arrays.equals(s.sorted(r.clone), a))}
    }
    impls foreach { sorter =>
    System.gc; Thread.sleep(300);
    val times = for (i <- 1 to 10) yield benchmark(sorter, i)
    val noOutliers = times.sorted.drop(2).reverse.drop(2)
    println("avg. " + noOutliers.sum / noOutliers.size / 1000000 + "[ms]")
    }

    def benchmark(sorter: Sorter, i: Int) = {
    System.gc; Thread.sleep(300)
    val r = RandomSource.getShuffled
    val t1 = System.nanoTime
    sorter.sorted(r)
    val t = System.nanoTime - t1
    println(sorter.getClass.getName+" #"+i+": "+((t)/1000000)+"[ms]")
    t
    }

    def check(refSorter: Sorter, sorters: Sorter*) {
    val r = RandomSource.getShuffled
    val a = refSorter.sorted(r.clone)
    sorters foreach {s => assert(Arrays.equals(s.sorted(r.clone), a))}
    }
    }
    195 changes: 125 additions & 70 deletions zBenchmarkResult.txt
    Original file line number Diff line number Diff line change
    @@ -1,78 +1,133 @@
    // @ Core i7 920
    SimpleSorter$ #1: 154[ms]
    SimpleSorter$ #2: 136[ms]
    SimpleSorter$ #3: 146[ms]
    SimpleSorter$ #4: 138[ms]
    SimpleSorter$ #5: 138[ms]
    SimpleSorter$ #6: 138[ms]
    SimpleSorter$ #7: 140[ms]
    SimpleSorter$ #8: 137[ms]
    SimpleSorter$ #9: 138[ms]
    SimpleSorter$ #10: 138[ms]
    avg. 138[ms]
    DivideAndMergeParallelSorter$ #1: 108[ms]
    SimpleSorter$ #1: 138[ms]
    SimpleSorter$ #2: 135[ms]
    SimpleSorter$ #3: 134[ms]
    SimpleSorter$ #4: 135[ms]
    SimpleSorter$ #5: 134[ms]
    SimpleSorter$ #6: 135[ms]
    SimpleSorter$ #7: 135[ms]
    SimpleSorter$ #8: 138[ms]
    SimpleSorter$ #9: 137[ms]
    SimpleSorter$ #10: 135[ms]
    avg. 135[ms]
    DivideAndMergeParallelSorter$ #1: 112[ms]
    DivideAndMergeParallelSorter$ #2: 75[ms]
    DivideAndMergeParallelSorter$ #3: 84[ms]
    DivideAndMergeParallelSorter$ #4: 76[ms]
    DivideAndMergeParallelSorter$ #5: 150[ms]
    DivideAndMergeParallelSorter$ #6: 80[ms]
    DivideAndMergeParallelSorter$ #7: 76[ms]
    DivideAndMergeParallelSorter$ #3: 76[ms]
    DivideAndMergeParallelSorter$ #4: 100[ms]
    DivideAndMergeParallelSorter$ #5: 88[ms]
    DivideAndMergeParallelSorter$ #6: 74[ms]
    DivideAndMergeParallelSorter$ #7: 75[ms]
    DivideAndMergeParallelSorter$ #8: 76[ms]
    DivideAndMergeParallelSorter$ #9: 75[ms]
    DivideAndMergeParallelSorter$ #10: 167[ms]
    avg. 83[ms]
    DivideAndMergeParallelSorter2$ #1: 57[ms]
    DivideAndMergeParallelSorter2$ #2: 44[ms]
    DivideAndMergeParallelSorter2$ #3: 86[ms]
    DivideAndMergeParallelSorter2$ #4: 51[ms]
    DivideAndMergeParallelSorter2$ #5: 79[ms]
    DivideAndMergeParallelSorter$ #9: 88[ms]
    DivideAndMergeParallelSorter$ #10: 74[ms]
    avg. 80[ms]
    DivideAndMergeParallelSorter2$ #1: 113[ms]
    DivideAndMergeParallelSorter2$ #2: 51[ms]
    DivideAndMergeParallelSorter2$ #3: 51[ms]
    DivideAndMergeParallelSorter2$ #4: 46[ms]
    DivideAndMergeParallelSorter2$ #5: 42[ms]
    DivideAndMergeParallelSorter2$ #6: 42[ms]
    DivideAndMergeParallelSorter2$ #7: 61[ms]
    DivideAndMergeParallelSorter2$ #8: 40[ms]
    DivideAndMergeParallelSorter2$ #9: 50[ms]
    DivideAndMergeParallelSorter2$ #10: 87[ms]
    avg. 57[ms]
    ShellParallelSorter$ #1: 171[ms]
    ShellParallelSorter$ #2: 151[ms]
    ShellParallelSorter$ #3: 202[ms]
    ShellParallelSorter$ #4: 156[ms]
    ShellParallelSorter$ #5: 151[ms]
    ShellParallelSorter$ #6: 202[ms]
    ShellParallelSorter$ #7: 152[ms]
    ShellParallelSorter$ #8: 145[ms]
    ShellParallelSorter$ #9: 204[ms]
    ShellParallelSorter$ #10: 151[ms]
    avg. 164[ms]
    ShellParallelSorterOpt$ #1: 92[ms]
    ShellParallelSorterOpt$ #2: 85[ms]
    ShellParallelSorterOpt$ #3: 117[ms]
    ShellParallelSorterOpt$ #4: 87[ms]
    ShellParallelSorterOpt$ #5: 173[ms]
    ShellParallelSorterOpt$ #6: 90[ms]
    ShellParallelSorterOpt$ #7: 85[ms]
    ShellParallelSorterOpt$ #8: 117[ms]
    ShellParallelSorterOpt$ #9: 85[ms]
    ShellParallelSorterOpt$ #10: 175[ms]
    avg. 98[ms]
    ShellParallelSorterOpt2$ #1: 79[ms]
    ShellParallelSorterOpt2$ #2: 83[ms]
    ShellParallelSorterOpt2$ #3: 81[ms]
    ShellParallelSorterOpt2$ #4: 82[ms]
    DivideAndMergeParallelSorter2$ #7: 48[ms]
    DivideAndMergeParallelSorter2$ #8: 50[ms]
    DivideAndMergeParallelSorter2$ #9: 49[ms]
    DivideAndMergeParallelSorter2$ #10: 51[ms]
    avg. 49[ms]
    ShellParallelSorter$ #1: 212[ms]
    ShellParallelSorter$ #2: 160[ms]
    ShellParallelSorter$ #3: 166[ms]
    ShellParallelSorter$ #4: 188[ms]
    ShellParallelSorter$ #5: 157[ms]
    ShellParallelSorter$ #6: 165[ms]
    ShellParallelSorter$ #7: 186[ms]
    ShellParallelSorter$ #8: 158[ms]
    ShellParallelSorter$ #9: 162[ms]
    ShellParallelSorter$ #10: 183[ms]
    avg. 170[ms]
    ShellParallelSorterOpt$ #1: 124[ms]
    ShellParallelSorterOpt$ #2: 110[ms]
    ShellParallelSorterOpt$ #3: 145[ms]
    ShellParallelSorterOpt$ #4: 90[ms]
    ShellParallelSorterOpt$ #5: 98[ms]
    ShellParallelSorterOpt$ #6: 85[ms]
    ShellParallelSorterOpt$ #7: 84[ms]
    ShellParallelSorterOpt$ #8: 107[ms]
    ShellParallelSorterOpt$ #9: 114[ms]
    ShellParallelSorterOpt$ #10: 144[ms]
    avg. 107[ms]
    ShellParallelSorterOpt2$ #1: 117[ms]
    ShellParallelSorterOpt2$ #2: 80[ms]
    ShellParallelSorterOpt2$ #3: 79[ms]
    ShellParallelSorterOpt2$ #4: 79[ms]
    ShellParallelSorterOpt2$ #5: 79[ms]
    ShellParallelSorterOpt2$ #6: 76[ms]
    ShellParallelSorterOpt2$ #7: 79[ms]
    ShellParallelSorterOpt2$ #8: 80[ms]
    ShellParallelSorterOpt2$ #9: 137[ms]
    ShellParallelSorterOpt2$ #10: 80[ms]
    avg. 80[ms]
    ShellParallelSorterThread$ #1: 133[ms]
    ShellParallelSorterThread$ #2: 119[ms]
    ShellParallelSorterThread$ #3: 123[ms]
    ShellParallelSorterThread$ #4: 120[ms]
    ShellParallelSorterThread$ #5: 122[ms]
    ShellParallelSorterThread$ #6: 121[ms]
    ShellParallelSorterOpt2$ #6: 80[ms]
    ShellParallelSorterOpt2$ #7: 78[ms]
    ShellParallelSorterOpt2$ #8: 81[ms]
    ShellParallelSorterOpt2$ #9: 77[ms]
    ShellParallelSorterOpt2$ #10: 78[ms]
    avg. 79[ms]
    ShellParallelSorterThread$ #1: 129[ms]
    ShellParallelSorterThread$ #2: 125[ms]
    ShellParallelSorterThread$ #3: 119[ms]
    ShellParallelSorterThread$ #4: 116[ms]
    ShellParallelSorterThread$ #5: 120[ms]
    ShellParallelSorterThread$ #6: 123[ms]
    ShellParallelSorterThread$ #7: 121[ms]
    ShellParallelSorterThread$ #8: 122[ms]
    ShellParallelSorterThread$ #9: 121[ms]
    ShellParallelSorterThread$ #10: 118[ms]
    ShellParallelSorterThread$ #9: 119[ms]
    ShellParallelSorterThread$ #10: 122[ms]
    avg. 121[ms]
    ShellParallelSorterThreadPool$ #1: 448[ms]
    ShellParallelSorterThreadPool$ #2: 397[ms]
    ShellParallelSorterThreadPool$ #3: 389[ms]
    ShellParallelSorterThreadPool$ #4: 487[ms]
    ShellParallelSorterThreadPool$ #5: 473[ms]
    ShellParallelSorterThreadPool$ #6: 389[ms]
    ShellParallelSorterThreadPool$ #7: 396[ms]
    ShellParallelSorterThreadPool$ #8: 398[ms]
    ShellParallelSorterThreadPool$ #9: 404[ms]
    ShellParallelSorterThreadPool$ #10: 397[ms]
    avg. 407[ms]
    QuickSeqSorter$ #1: 124[ms]
    QuickSeqSorter$ #2: 122[ms]
    QuickSeqSorter$ #3: 121[ms]
    QuickSeqSorter$ #4: 121[ms]
    QuickSeqSorter$ #5: 122[ms]
    QuickSeqSorter$ #6: 121[ms]
    QuickSeqSorter$ #7: 121[ms]
    QuickSeqSorter$ #8: 122[ms]
    QuickSeqSorter$ #9: 121[ms]
    QuickSeqSorter$ #10: 124[ms]
    avg. 122[ms]
    QuickSeqQueueSorter$ #1: 131[ms]
    QuickSeqQueueSorter$ #2: 126[ms]
    QuickSeqQueueSorter$ #3: 125[ms]
    QuickSeqQueueSorter$ #4: 124[ms]
    QuickSeqQueueSorter$ #5: 124[ms]
    QuickSeqQueueSorter$ #6: 123[ms]
    QuickSeqQueueSorter$ #7: 126[ms]
    QuickSeqQueueSorter$ #8: 123[ms]
    QuickSeqQueueSorter$ #9: 124[ms]
    QuickSeqQueueSorter$ #10: 124[ms]
    avg. 124[ms]
    QuickParallelSorter$ #1: 34[ms]
    QuickParallelSorter$ #2: 32[ms]
    QuickParallelSorter$ #3: 33[ms]
    QuickParallelSorter$ #4: 31[ms]
    QuickParallelSorter$ #5: 28[ms]
    QuickParallelSorter$ #6: 31[ms]
    QuickParallelSorter$ #7: 29[ms]
    QuickParallelSorter$ #8: 29[ms]
    QuickParallelSorter$ #9: 31[ms]
    QuickParallelSorter$ #10: 29[ms]
    avg. 31[ms]
    ParallelSorter #1: 36[ms]
    ParallelSorter #2: 140[ms]
    ParallelSorter #3: 29[ms]
    ParallelSorter #4: 32[ms]
    ParallelSorter #5: 30[ms]
    ParallelSorter #6: 142[ms]
    ParallelSorter #7: 29[ms]
    ParallelSorter #8: 28[ms]
    ParallelSorter #9: 29[ms]
    ParallelSorter #10: 141[ms]
    avg. 50[ms]
  4. @akihiro4chawon akihiro4chawon revised this gist May 19, 2011. 1 changed file with 78 additions and 0 deletions.
    78 changes: 78 additions & 0 deletions zBenchmarkResult.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,78 @@
    // @ Core i7 920
    SimpleSorter$ #1: 154[ms]
    SimpleSorter$ #2: 136[ms]
    SimpleSorter$ #3: 146[ms]
    SimpleSorter$ #4: 138[ms]
    SimpleSorter$ #5: 138[ms]
    SimpleSorter$ #6: 138[ms]
    SimpleSorter$ #7: 140[ms]
    SimpleSorter$ #8: 137[ms]
    SimpleSorter$ #9: 138[ms]
    SimpleSorter$ #10: 138[ms]
    avg. 138[ms]
    DivideAndMergeParallelSorter$ #1: 108[ms]
    DivideAndMergeParallelSorter$ #2: 75[ms]
    DivideAndMergeParallelSorter$ #3: 84[ms]
    DivideAndMergeParallelSorter$ #4: 76[ms]
    DivideAndMergeParallelSorter$ #5: 150[ms]
    DivideAndMergeParallelSorter$ #6: 80[ms]
    DivideAndMergeParallelSorter$ #7: 76[ms]
    DivideAndMergeParallelSorter$ #8: 76[ms]
    DivideAndMergeParallelSorter$ #9: 75[ms]
    DivideAndMergeParallelSorter$ #10: 167[ms]
    avg. 83[ms]
    DivideAndMergeParallelSorter2$ #1: 57[ms]
    DivideAndMergeParallelSorter2$ #2: 44[ms]
    DivideAndMergeParallelSorter2$ #3: 86[ms]
    DivideAndMergeParallelSorter2$ #4: 51[ms]
    DivideAndMergeParallelSorter2$ #5: 79[ms]
    DivideAndMergeParallelSorter2$ #6: 42[ms]
    DivideAndMergeParallelSorter2$ #7: 61[ms]
    DivideAndMergeParallelSorter2$ #8: 40[ms]
    DivideAndMergeParallelSorter2$ #9: 50[ms]
    DivideAndMergeParallelSorter2$ #10: 87[ms]
    avg. 57[ms]
    ShellParallelSorter$ #1: 171[ms]
    ShellParallelSorter$ #2: 151[ms]
    ShellParallelSorter$ #3: 202[ms]
    ShellParallelSorter$ #4: 156[ms]
    ShellParallelSorter$ #5: 151[ms]
    ShellParallelSorter$ #6: 202[ms]
    ShellParallelSorter$ #7: 152[ms]
    ShellParallelSorter$ #8: 145[ms]
    ShellParallelSorter$ #9: 204[ms]
    ShellParallelSorter$ #10: 151[ms]
    avg. 164[ms]
    ShellParallelSorterOpt$ #1: 92[ms]
    ShellParallelSorterOpt$ #2: 85[ms]
    ShellParallelSorterOpt$ #3: 117[ms]
    ShellParallelSorterOpt$ #4: 87[ms]
    ShellParallelSorterOpt$ #5: 173[ms]
    ShellParallelSorterOpt$ #6: 90[ms]
    ShellParallelSorterOpt$ #7: 85[ms]
    ShellParallelSorterOpt$ #8: 117[ms]
    ShellParallelSorterOpt$ #9: 85[ms]
    ShellParallelSorterOpt$ #10: 175[ms]
    avg. 98[ms]
    ShellParallelSorterOpt2$ #1: 79[ms]
    ShellParallelSorterOpt2$ #2: 83[ms]
    ShellParallelSorterOpt2$ #3: 81[ms]
    ShellParallelSorterOpt2$ #4: 82[ms]
    ShellParallelSorterOpt2$ #5: 79[ms]
    ShellParallelSorterOpt2$ #6: 76[ms]
    ShellParallelSorterOpt2$ #7: 79[ms]
    ShellParallelSorterOpt2$ #8: 80[ms]
    ShellParallelSorterOpt2$ #9: 137[ms]
    ShellParallelSorterOpt2$ #10: 80[ms]
    avg. 80[ms]
    ShellParallelSorterThread$ #1: 133[ms]
    ShellParallelSorterThread$ #2: 119[ms]
    ShellParallelSorterThread$ #3: 123[ms]
    ShellParallelSorterThread$ #4: 120[ms]
    ShellParallelSorterThread$ #5: 122[ms]
    ShellParallelSorterThread$ #6: 121[ms]
    ShellParallelSorterThread$ #7: 121[ms]
    ShellParallelSorterThread$ #8: 122[ms]
    ShellParallelSorterThread$ #9: 121[ms]
    ShellParallelSorterThread$ #10: 118[ms]
    avg. 121[ms]
  5. @akihiro4chawon akihiro4chawon created this gist May 19, 2011.
    235 changes: 235 additions & 0 deletions parasort.scala
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,235 @@
    import java.util.Arrays

    abstract class Sorter {
    def sorted(a: Array[Int]): Array[Int]
    }

    object SimpleSorter extends Sorter {
    def sorted(a: Array[Int]) = {
    Arrays.sort(a)
    a
    }
    }

    object DivideAndMergeParallelSorter extends Sorter {
    def sorted(a: Array[Int]) = {
    require(a.length >= 2)
    import scala.annotation.tailrec
    import scala.concurrent.ops._
    val len = a.length
    val half = len / 2
    par(Arrays.sort(a, 0, half), Arrays.sort(a, half, len))

    val ret = new Array[Int](a.length)
    @tailrec
    def merge(i: Int, j: Int, k: Int) {
    if (a(j) <= a(k)) {
    ret(i) = a(j)
    if (j < half - 1) merge(i + 1, j + 1, k)
    else System.arraycopy(a, k, ret, i + 1, len - k)
    } else {
    ret(i) = a(k)
    if (k < len - 1) merge(i + 1, j, k + 1)
    else System.arraycopy(a, j, ret, i + 1, half - j)
    }
    }
    merge(0, 0, half)
    ret
    }
    }

    object DivideAndMergeParallelSorter2 extends Sorter {
    def sorted(a: Array[Int]) = {
    require(a.length >= scala.collection.parallel.availableProcessors)
    import scala.annotation.tailrec
    import scala.concurrent.ops._

    val nDiv = collection.parallel.availableProcessors
    val len = a.length
    val pslices = (0 until nDiv).par map {i => Arrays.copyOfRange(a, i * len / nDiv, (i + 1) * len / nDiv)}
    pslices foreach (Arrays.sort _)

    def merge(a: Array[Int], b: Array[Int]): Array[Int] = {
    val alen = a.length
    val blen = b.length
    val ret = new Array[Int](alen + blen);
    @tailrec def rec(i: Int, j: Int, k: Int) {
    if (a(j) <= b(k)) {
    ret(i) = a(j)
    if (j < alen - 1) rec(i + 1, j + 1, k)
    else System.arraycopy(b, k, ret, i + 1, blen - k)
    } else {
    ret(i) = b(k)
    if (k < blen - 1) rec(i + 1, j, k + 1)
    else System.arraycopy(a, j, ret, i + 1, alen - j)
    }
    }
    rec(0, 0, 0)
    ret
    }
    pslices reduce merge
    }
    }

    object ShellParallelSorter extends Sorter {
    def sorted(a: Array[Int]) = {
    val hInit = (Iterator.iterate(1)(_ * 3 + 1) find (a.length <) get) / 3
    for (h <- Iterator.iterate(hInit)(_ / 3) takeWhile (1 <=)) {
    for (k <- 0 until h par) {
    var i = k - h; while ({i += h; i < a.size}) {
    // for (i <- k until a.size by h) {
    val v = a(i)
    var j = i
    while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - h)
    j -= h
    }
    a(j) = v
    }
    }
    }
    a
    }
    }

    object ShellParallelSorterOpt extends Sorter {
    def sorted(a: Array[Int]) = {
    val len = a.length
    var h = 1; while (h < len) {h *= 3; h += 1}
    while ({h /= 3; h >= 1}) {
    for (k <- 0 until h par) {
    var i = k - h; while ({i += h; i < len}) {
    val v = a(i)
    var j = i
    while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - h)
    j -= h
    }
    a(j) = v
    }
    }
    }
    a
    }
    }

    object ShellParallelSorterOpt2 extends Sorter {
    def sorted(a: Array[Int]) = {
    val len = a.length
    var h = 1; while (h < len) {h *= 3; h += 1}
    h/=3;h/=3
    while ({h /= 3; h > 1}) {
    for (k <- 0 until h par) {
    var i = k - h; while ({i += h; i < len}) {
    val v = a(i)
    var j = i
    while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - h)
    j -= h
    }
    a(j) = v
    }
    }
    }
    var i = 0; while ({i += 1; i < len}) {
    val v = a(i)
    var j = i; while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - 1)
    j -= 1
    }
    a(j) = v
    }
    a
    }
    }

    object ShellParallelSorterThread extends Sorter with Runnable {
    val count = new java.util.concurrent.atomic.AtomicInteger
    var h: Int = 1
    var a: Array[Int] = null

    def run() {
    var i: Int = 0
    val len = this.a.length
    val a = this.a
    val h = this.h
    while ({i = count.getAndDecrement; i >= 0}) {
    i -= h; while ({i += h; i < len}) {
    val v = a(i)
    var j = i
    while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - h)
    j -= h
    }
    a(j) = v
    }
    }
    }
    def sorted(a: Array[Int]) = {
    this.a = a
    val len = a.length
    while (h < len) {h *= 3; h += 1}

    h /= 3;
    while ({h /= 3; h > 1}) {
    count.set(h - 1)
    val nThreads = (Runtime.getRuntime.availableProcessors - 1) min (h - 1)
    val pool = Array.fill(nThreads)(new Thread(this))
    pool foreach {_.start}
    run
    pool foreach {_.join}
    }

    var i = 0; while ({i += 1; i < len}) {
    val v = a(i)
    var j = i; while (j >= h && (a(j - h) > v)) {
    a(j) = a(j - 1)
    j -= 1
    }
    a(j) = v
    }
    a
    }
    }

    object Main extends App {
    import scala.collection.mutable.WrappedArray
    import scala.util.Random

    object RandomSource {
    private val src: WrappedArray[Int] = Array.range(0, 1000000)
    def getShuffled = scala.util.Random.shuffle(src).toArray
    }

    val impls = Seq(
    SimpleSorter,
    DivideAndMergeParallelSorter,
    DivideAndMergeParallelSorter2,
    ShellParallelSorter,
    ShellParallelSorterOpt,
    ShellParallelSorterOpt2,
    ShellParallelSorterThread)

    check(impls.head, impls.tail :_*)
    impls foreach { sorter =>
    System.gc
    val times = for (i <- 1 to 10) yield benchmark(sorter, i)
    val noOutliers = times.sorted.drop(2).reverse.drop(2)
    println("avg. " + noOutliers.sum / noOutliers.size / 1000000 + "[ms]")
    }

    def benchmark(sorter: Sorter, i: Int) = {
    val r = RandomSource.getShuffled
    val t1 = System.nanoTime
    sorter.sorted(r)
    val t = System.nanoTime - t1
    println(sorter.getClass.getName+" #"+i+": "+((t)/1000000)+"[ms]")
    t
    }

    def check(refSorter: Sorter, sorters: Sorter*) {
    val r = RandomSource.getShuffled
    val a = refSorter.sorted(r.clone)
    sorters foreach {s => assert(Arrays.equals(s.sorted(r.clone), a))}
    }
    }