bbvch13531 · September 11, 2021 08:09 · bbvch13531 · Sep 11, 2021
diff --git a/ConcurrentPerformBatch.swift b/ConcurrentPerformBatch.swift
 import Foundation

 class ConcurrentPerformBatchExample {
    func isSatisfingF1(x: Double, y: Double) -> Bool {
        return x * x + (y - 5) * (y - 5) > 5 * 5
    }
    
    func isSatisfingF2(x: Double, y: Double) -> Bool {
        return (x * x) + (y * y) < 10 * 10
    }
    
    func isSatisfingF3(x: Double, y: Double) -> Bool {
        return (x - 5) * (x - 5) + (y - 10) * (y - 10) < 5 * 5
    }
    
    func isInsideS(x: Double, y: Double) -> Bool {
        return isSatisfingF1(x: x, y: y) && isSatisfingF2(x: x, y: y) && isSatisfingF3(x: x, y: y)
    }
    
    static let iterations = 10_000_0000
    var count = 0
    let dispatchQueueConcurrent = DispatchQueue.init(label: "worker1", attributes: .concurrent)
    let dispatchQueueSerial = DispatchQueue.init(label: "worker2")
    var sem = DispatchSemaphore(value: 1)
    
    func start() {
        let startTime = DispatchTime.now()
        DispatchQueue.concurrentPerform(iterations: ConcurrentPerformBatchExample.iterations) { [weak self] iter in
            guard let self = self else { return }
            let x = Double.random(in: 0.0...10.0)
            let y = Double.random(in: 0.0...10.0)
            
            if self.isInsideS(x: x, y: y) {
                _ = self.sem.wait(timeout: .distantFuture)
                self.count += 1
                self.sem.signal()
            }
            
            if iter % (ConcurrentPerformBatchExample.iterations/100) == 0 {
                print("\(iter / (ConcurrentPerformBatchExample.iterations/100)) / 100")
            }
        }
        let endTime = DispatchTime.now()
        let nanoSeconds = endTime.uptimeNanoseconds - startTime.uptimeNanoseconds
        let intervalTime = Double(nanoSeconds) / 1_000_000
        
        print("count = \(count)")
        print("S = \(Double(count) * 100 / Double(ConcurrentPerformBatchExample.iterations))")
        print("time = \(intervalTime) milsec")
        exit(EXIT_SUCCESS)
    }
 }
 /*
 iterations = 10_000_000
 count = 977229
 S = 9.77229
 time = 10743.776137
 
 iterations: 100_000_000
 count = 9776076
 S = 9.776076
 time = 148231.200488
 */
diff --git a/GPUComputationMTL.swift b/GPUComputationMTL.swift
 import Foundation
 import Metal
 import MetalKit

 class GPUComputationMTL {
    
    static let iterations = 100_000_000
    
    var startTime: DispatchTime?
    var endTime: DispatchTime?
    
    func start() {
        startTime = DispatchTime.now()
        guard let device = MTLCreateSystemDefaultDevice(),
              let defaultLibrary = device.makeDefaultLibrary(),
              let commandQueue = device.makeCommandQueue(),
              let isInsideSFunc = defaultLibrary.makeFunction(name: "isInsideS") else {
                print("Failed to create MTLdevice")
                return
        }
        var pipelineState: MTLComputePipelineState
        do {
            pipelineState = try device.makeComputePipelineState(function: isInsideSFunc)
        } catch {
            return
        }
        
        guard let commandBuffer = commandQueue.makeCommandBuffer(),
              let computeEncoder = commandBuffer.makeComputeCommandEncoder() else { return }
        
        var counter: Int32 = 0
        
        // setup MTLBuffer
        let bufferOut = device.makeBuffer(length: MemoryLayout<Bool>.size * GPUComputationMTL.iterations, options: .storageModeShared)
        let bufferCounter = device.makeBuffer(bytes: &counter, length: MemoryLayout<Int32>.size * 1, options: .storageModeShared)
        
        computeEncoder.setComputePipelineState(pipelineState)
        computeEncoder.setBuffer(bufferOut, offset: 0, index: 0)
        computeEncoder.setBuffer(bufferCounter, offset: 0, index: 1)
        
        let gridSize = MTLSizeMake(GPUComputationMTL.iterations / 1000, 1, 1)
        
        let threadGroupSize = MTLSizeMake(1000, 1, 1)
        computeEncoder.dispatchThreadgroups(gridSize, threadsPerThreadgroup: threadGroupSize)
        
        computeEncoder.endEncoding()
        commandBuffer.addCompletedHandler { _ in
            self.endTime = DispatchTime.now()
        }
        
        commandBuffer.commit()
        commandBuffer.waitUntilCompleted()

        
        guard let contents = bufferCounter?.contents() else { return }
        let ptr = contents.bindMemory(to: Int32.self, capacity: 1)
        let arr = UnsafeMutableBufferPointer(start: ptr, count: 1)


        var count = arr[0]
        
        print("count = \(count)")
        print("S = \(Double(count) * 100 / Double(GPUComputationMTL.iterations))")
        
        
        guard let startTime = startTime,
              let endTime = endTime else { return }
        
        let nanoSeconds = endTime.uptimeNanoseconds - startTime.uptimeNanoseconds
        let intervalTime = Float32(nanoSeconds) / 1_000_000
        print("time = \(intervalTime) milsec")
    }
 }

 /*
 GPUComputationMTL
 iterations = 100_000_000
 count = 9774663
 S = 9.774663
 time = 247.94894 milsec
 */
diff --git a/isInsideS.metal b/isInsideS.metal
 //
 //  isInsideS.metal
 //  MonteCarloSimulation1
 //
 //  Created by KyungYoung Heo on 2021/09/11.
 //

 #include <metal_stdlib>
 using namespace metal;

 bool isSatisfingF1(float x, float y) {
    if (x * x + (y - 5) * (y - 5) > 5 * 5) {
        return true;
    } else {
        return false;
    }
 }

 bool isSatisfingF2(float x, float y) {
    if ((x * x) + (y * y) < 10 * 10) {
        return true;
    } else {
        return false;
    }
 }

 bool isSatisfingF3(float x, float y) {
    if((x - 5) * (x - 5) + (y - 10) * (y - 10) < 5 * 5) {
        return true;
    } else {
        return false;
    }
 }

 //float rand(int i){
 //    return fract(sin(dot(i, float2(12.9898, 78.233))) * 43758.5453);
 //}


 uint rand_lcg(uint rng_state) {
    return 1664525 * rng_state + 1013904223;
 }

 uint rand_xorshift(uint rng_state) {
    // Xorshift algorithm from George Marsaglia's paper
    rng_state ^= (rng_state << 13);
    rng_state ^= (rng_state >> 17);
    rng_state ^= (rng_state << 5);
    return rng_state;
 }

 kernel void isInsideS(
            device bool* out,
            device atomic_uint* counter,
            uint index [[thread_position_in_grid]]
            ) {
    
    uint rng_state = rand_lcg(index);
    float x = float(rand_xorshift(rng_state)) * (1.0 / 4294967296.0) * 10;
    rng_state = rand_lcg(rng_state);
    float y = float(rand_xorshift(rng_state)) * (1.0 / 4294967296.0) * 10;
    
    
    out[index] = isSatisfingF1(x, y) && isSatisfingF2(x, y) && isSatisfingF3(x, y);
    if(out[index]) {
        atomic_fetch_add_explicit(counter, 1, memory_order_relaxed);
    }
 }
diff --git a/main.swift b/main.swift
 //
 //  main.swift
 //  MonteCarloSimulation1
 //
 //  Created by KyungYoung Heo on 2021/09/10.
 //

 import Foundation

 //let semaphoreExample = SemaphoreExample()
 //semaphoreExample.start()


 //let concurrentPerformBatchExample = ConcurrentPerformBatchExample()
 //concurrentPerformBatchExample.start()
 //RunLoop.main.run()


 let gPUComputationMTL = GPUComputationMTL()
 gPUComputationMTL.start()

 /*
 DispatchQueue.global().async
 count = 9768064
 S = 9.768064
 time = 967947.016009 milsec

 DispatchQueue.concurrentPerform
 count = 9776076
 S = 9.776076
 time = 148231.200488 milsec
 
 GPUComputationMTL
 count = 9774663
 S = 9.774663
 time = 247.94894 milsec
 */
diff --git a/SemaphoreExample.swift b/SemaphoreExample.swift
 import Foundation

 class SemaphoreExample {
    func isSatisfingF1(x: Double, y: Double) -> Bool {
        return x * x + (y - 5) * (y - 5) > 5 * 5
    }

    func isSatisfingF2(x: Double, y: Double) -> Bool {
        return (x * x) + (y * y) < 10 * 10
    }

    func isSatisfingF3(x: Double, y: Double) -> Bool {
        return (x - 5) * (x - 5) + (y - 10) * (y - 10) < 5 * 5
    }

    func isInsideS(x: Double, y: Double) -> Bool {
        return isSatisfingF1(x: x, y: y) && isSatisfingF2(x: x, y: y) && isSatisfingF3(x: x, y: y)
    }

    static let iterations = 100_000_000
    var count = 0
    let dispatchQueueConcurrent = DispatchQueue.init(label: "worker1", attributes: .concurrent)
    let dispatchQueueSerial = DispatchQueue.init(label: "worker2")
    var sem = DispatchSemaphore(value: 1)
    let group = DispatchGroup()
    
    func start() {
        let startTime = DispatchTime.now()
        for i in 0..<SemaphoreExample.iterations {
            dispatchQueueConcurrent.async { [weak self] in
                guard let self = self else { return }
                let x = Double.random(in: 0.0...10.0)
                let y = Double.random(in: 0.0...10.0)

                if self.isInsideS(x: x, y: y) {
                    _ = self.sem.wait(timeout: .distantFuture)
                    self.count += 1
                    self.sem.signal()
        
                }
                
                if i % (SemaphoreExample.iterations / 100) == 0 {
                    print("\(i / (SemaphoreExample.iterations / 100)) / 100")
                }
            }
        }
        let endTime = DispatchTime.now()
        let nanoSeconds = endTime.uptimeNanoseconds - startTime.uptimeNanoseconds
        let intervalTime = Double(nanoSeconds) / 1_000_000
        
        print("count = \(count)")
        print("S = \(Double(count) * 100 / Double(SemaphoreExample.iterations))")
        print("time = \(intervalTime) milsec")
        exit(EXIT_SUCCESS)
    }
 }
 /*
 DispatchQueue.global().async
 iterations = 10_000_000
 count = 978294
 S = 9.78294
 time = 43135.978637
 
 iterations = 100_000_000
 count = 9768064
 S = 9.768064
 time = 967947.016009
 */
	import Foundation

	class ConcurrentPerformBatchExample {
	func isSatisfingF1(x: Double, y: Double) -> Bool {
	return x * x + (y - 5) * (y - 5) > 5 * 5
	}

	func isSatisfingF2(x: Double, y: Double) -> Bool {
	return (x * x) + (y * y) < 10 * 10
	}

	func isSatisfingF3(x: Double, y: Double) -> Bool {
	return (x - 5) * (x - 5) + (y - 10) * (y - 10) < 5 * 5
	}

	func isInsideS(x: Double, y: Double) -> Bool {
	return isSatisfingF1(x: x, y: y) && isSatisfingF2(x: x, y: y) && isSatisfingF3(x: x, y: y)
	}

	static let iterations = 10_000_0000
	var count = 0
	let dispatchQueueConcurrent = DispatchQueue.init(label: "worker1", attributes: .concurrent)
	let dispatchQueueSerial = DispatchQueue.init(label: "worker2")
	var sem = DispatchSemaphore(value: 1)

	func start() {
	let startTime = DispatchTime.now()
	DispatchQueue.concurrentPerform(iterations: ConcurrentPerformBatchExample.iterations) { [weak self] iter in
	guard let self = self else { return }
	let x = Double.random(in: 0.0...10.0)
	let y = Double.random(in: 0.0...10.0)

	if self.isInsideS(x: x, y: y) {
	_ = self.sem.wait(timeout: .distantFuture)
	self.count += 1
	self.sem.signal()
	}

	if iter % (ConcurrentPerformBatchExample.iterations/100) == 0 {
	print("\(iter / (ConcurrentPerformBatchExample.iterations/100)) / 100")
	}
	}
	let endTime = DispatchTime.now()
	let nanoSeconds = endTime.uptimeNanoseconds - startTime.uptimeNanoseconds
	let intervalTime = Double(nanoSeconds) / 1_000_000

	print("count = \(count)")
	print("S = \(Double(count) * 100 / Double(ConcurrentPerformBatchExample.iterations))")
	print("time = \(intervalTime) milsec")
	exit(EXIT_SUCCESS)
	}
	}
	/*
	iterations = 10_000_000
	count = 977229
	S = 9.77229
	time = 10743.776137

	iterations: 100_000_000
	count = 9776076
	S = 9.776076
	time = 148231.200488
	*/
	import Foundation
	import Metal
	import MetalKit

	class GPUComputationMTL {

	static let iterations = 100_000_000

	var startTime: DispatchTime?
	var endTime: DispatchTime?

	func start() {
	startTime = DispatchTime.now()
	guard let device = MTLCreateSystemDefaultDevice(),
	let defaultLibrary = device.makeDefaultLibrary(),
	let commandQueue = device.makeCommandQueue(),
	let isInsideSFunc = defaultLibrary.makeFunction(name: "isInsideS") else {
	print("Failed to create MTLdevice")
	return
	}
	var pipelineState: MTLComputePipelineState
	do {
	pipelineState = try device.makeComputePipelineState(function: isInsideSFunc)
	} catch {
	return
	}

	guard let commandBuffer = commandQueue.makeCommandBuffer(),
	let computeEncoder = commandBuffer.makeComputeCommandEncoder() else { return }

	var counter: Int32 = 0

	// setup MTLBuffer
	let bufferOut = device.makeBuffer(length: MemoryLayout<Bool>.size * GPUComputationMTL.iterations, options: .storageModeShared)
	let bufferCounter = device.makeBuffer(bytes: &counter, length: MemoryLayout<Int32>.size * 1, options: .storageModeShared)

	computeEncoder.setComputePipelineState(pipelineState)
	computeEncoder.setBuffer(bufferOut, offset: 0, index: 0)
	computeEncoder.setBuffer(bufferCounter, offset: 0, index: 1)

	let gridSize = MTLSizeMake(GPUComputationMTL.iterations / 1000, 1, 1)

	let threadGroupSize = MTLSizeMake(1000, 1, 1)
	computeEncoder.dispatchThreadgroups(gridSize, threadsPerThreadgroup: threadGroupSize)

	computeEncoder.endEncoding()
	commandBuffer.addCompletedHandler { _ in
	self.endTime = DispatchTime.now()
	}

	commandBuffer.commit()
	commandBuffer.waitUntilCompleted()


	guard let contents = bufferCounter?.contents() else { return }
	let ptr = contents.bindMemory(to: Int32.self, capacity: 1)
	let arr = UnsafeMutableBufferPointer(start: ptr, count: 1)


	var count = arr[0]

	print("count = \(count)")
	print("S = \(Double(count) * 100 / Double(GPUComputationMTL.iterations))")


	guard let startTime = startTime,
	let endTime = endTime else { return }

	let nanoSeconds = endTime.uptimeNanoseconds - startTime.uptimeNanoseconds
	let intervalTime = Float32(nanoSeconds) / 1_000_000
	print("time = \(intervalTime) milsec")
	}
	}

	/*
	GPUComputationMTL
	iterations = 100_000_000
	count = 9774663
	S = 9.774663
	time = 247.94894 milsec
	*/
	//
	// isInsideS.metal
	// MonteCarloSimulation1
	//
	// Created by KyungYoung Heo on 2021/09/11.
	//

	#include <metal_stdlib>
	using namespace metal;

	bool isSatisfingF1(float x, float y) {
	if (x * x + (y - 5) * (y - 5) > 5 * 5) {
	return true;
	} else {
	return false;
	}
	}

	bool isSatisfingF2(float x, float y) {
	if ((x * x) + (y * y) < 10 * 10) {
	return true;
	} else {
	return false;
	}
	}

	bool isSatisfingF3(float x, float y) {
	if((x - 5) * (x - 5) + (y - 10) * (y - 10) < 5 * 5) {
	return true;
	} else {
	return false;
	}
	}

	//float rand(int i){
	// return fract(sin(dot(i, float2(12.9898, 78.233))) * 43758.5453);
	//}


	uint rand_lcg(uint rng_state) {
	return 1664525 * rng_state + 1013904223;
	}

	uint rand_xorshift(uint rng_state) {
	// Xorshift algorithm from George Marsaglia's paper
	rng_state ^= (rng_state << 13);
	rng_state ^= (rng_state >> 17);
	rng_state ^= (rng_state << 5);
	return rng_state;
	}

	kernel void isInsideS(
	device bool* out,
	device atomic_uint* counter,
	uint index [[thread_position_in_grid]]
	) {

	uint rng_state = rand_lcg(index);
	float x = float(rand_xorshift(rng_state)) * (1.0 / 4294967296.0) * 10;
	rng_state = rand_lcg(rng_state);
	float y = float(rand_xorshift(rng_state)) * (1.0 / 4294967296.0) * 10;


	out[index] = isSatisfingF1(x, y) && isSatisfingF2(x, y) && isSatisfingF3(x, y);
	if(out[index]) {
	atomic_fetch_add_explicit(counter, 1, memory_order_relaxed);
	}
	}
	//
	// main.swift
	// MonteCarloSimulation1
	//
	// Created by KyungYoung Heo on 2021/09/10.
	//

	import Foundation

	//let semaphoreExample = SemaphoreExample()
	//semaphoreExample.start()


	//let concurrentPerformBatchExample = ConcurrentPerformBatchExample()
	//concurrentPerformBatchExample.start()
	//RunLoop.main.run()


	let gPUComputationMTL = GPUComputationMTL()
	gPUComputationMTL.start()

	/*
	DispatchQueue.global().async
	count = 9768064
	S = 9.768064
	time = 967947.016009 milsec

	DispatchQueue.concurrentPerform
	count = 9776076
	S = 9.776076
	time = 148231.200488 milsec

	GPUComputationMTL
	count = 9774663
	S = 9.774663
	time = 247.94894 milsec
	*/
	import Foundation

	class SemaphoreExample {
	func isSatisfingF1(x: Double, y: Double) -> Bool {
	return x * x + (y - 5) * (y - 5) > 5 * 5
	}

	func isSatisfingF2(x: Double, y: Double) -> Bool {
	return (x * x) + (y * y) < 10 * 10
	}

	func isSatisfingF3(x: Double, y: Double) -> Bool {
	return (x - 5) * (x - 5) + (y - 10) * (y - 10) < 5 * 5
	}

	func isInsideS(x: Double, y: Double) -> Bool {
	return isSatisfingF1(x: x, y: y) && isSatisfingF2(x: x, y: y) && isSatisfingF3(x: x, y: y)
	}

	static let iterations = 100_000_000
	var count = 0
	let dispatchQueueConcurrent = DispatchQueue.init(label: "worker1", attributes: .concurrent)
	let dispatchQueueSerial = DispatchQueue.init(label: "worker2")
	var sem = DispatchSemaphore(value: 1)
	let group = DispatchGroup()

	func start() {
	let startTime = DispatchTime.now()
	for i in 0..<SemaphoreExample.iterations {
	dispatchQueueConcurrent.async { [weak self] in
	guard let self = self else { return }
	let x = Double.random(in: 0.0...10.0)
	let y = Double.random(in: 0.0...10.0)

	if self.isInsideS(x: x, y: y) {
	_ = self.sem.wait(timeout: .distantFuture)
	self.count += 1
	self.sem.signal()

	}

	if i % (SemaphoreExample.iterations / 100) == 0 {
	print("\(i / (SemaphoreExample.iterations / 100)) / 100")
	}
	}
	}
	let endTime = DispatchTime.now()
	let nanoSeconds = endTime.uptimeNanoseconds - startTime.uptimeNanoseconds
	let intervalTime = Double(nanoSeconds) / 1_000_000

	print("count = \(count)")
	print("S = \(Double(count) * 100 / Double(SemaphoreExample.iterations))")
	print("time = \(intervalTime) milsec")
	exit(EXIT_SUCCESS)
	}
	}
	/*
	DispatchQueue.global().async
	iterations = 10_000_000
	count = 978294
	S = 9.78294
	time = 43135.978637

	iterations = 100_000_000
	count = 9768064
	S = 9.768064
	time = 967947.016009
	*/