Last active
June 5, 2020 18:28
-
-
Save prufrock/f1f3e5ba6e28eea817d92fa82d3d7452 to your computer and use it in GitHub Desktop.
I converted this Apple Metal tutorial, https://developer.apple.com/documentation/metal/basic_tasks_and_concepts/performing_calculations_on_a_gpu?preferredLanguage=occ, into Swift with some tweaks. This should technicall be "MetalAdder.playground" but GitHub doesn't seem to know what to make of the ".playground" extension. If you want to try it o…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import PlaygroundSupport | |
import MetalKit | |
// The length in Apple's example but it takes too long to build 2 buffers of that length | |
//let arrayLength: Int = 1 << 24 | |
let arrayLength: Int = 1 << 20 | |
let bufferSize = arrayLength * MemoryLayout<Float>.stride | |
class MetalAdder { | |
var device: MTLDevice | |
// The compute pipeline generated from the compute kernel in the .metal shader file. | |
var addFunctionPSO: MTLComputePipelineState | |
// The command queue used to pass commands to the device. | |
var commandQueue: MTLCommandQueue | |
//Buffers to hold data | |
var mBufferA: MTLBuffer? | |
var mBufferB: MTLBuffer? | |
var mBufferResult: MTLBuffer? | |
init(withDevice: MTLDevice) throws { | |
device = withDevice | |
let shader = """ | |
#include <metal_stdlib> | |
using namespace metal; | |
/// This is a Metal Shading Language (MSL) function equivalent to the add_arrays() C function, used to perform the calculation on a GPU. | |
kernel void add_arrays(device const float* inA, | |
device const float* inB, | |
device float* result, | |
uint index [[thread_position_in_grid]]) | |
{ | |
// the for-loop is replaced with a collection of threads, each of which | |
// calls this function. | |
result[index] = inA[index] + inB[index]; | |
} | |
""" | |
let library = try device.makeLibrary(source: shader, options: nil) | |
guard let addFunction = library.makeFunction(name: "add_arrays") else { | |
fatalError("Could not create the adder function") | |
} | |
// Create a compute pipeline state object | |
try addFunctionPSO = device.makeComputePipelineState(function: addFunction) | |
guard let _commandQueue = device.makeCommandQueue() else { | |
fatalError("Could not create command queue") | |
} | |
commandQueue = _commandQueue | |
} | |
func prepareData() { | |
// Allocate three buffers to hold our initial data and the result. | |
mBufferA = device.makeBuffer(length: bufferSize, options: .storageModeShared) | |
mBufferB = device.makeBuffer(length: bufferSize, options: .storageModeShared) | |
mBufferResult = device.makeBuffer(length: bufferSize, options: .storageModeShared) | |
generateRandomFloatData(buffer: mBufferA!) | |
generateRandomFloatData(buffer: mBufferB!) | |
} | |
func sendComputeCommand() { | |
// Create a command buffer to hold commands. | |
guard let commandBuffer = commandQueue.makeCommandBuffer() else { | |
fatalError("Could not create command buffer") | |
} | |
// Start a compute pass. | |
guard let computeEncoder = commandBuffer.makeComputeCommandEncoder() else { | |
fatalError("Could not create compute encoder") | |
} | |
encodeAddCommand(computeEncoder: computeEncoder) | |
//End the computer pass | |
commandBuffer.commit() | |
let startTime = CFAbsoluteTimeGetCurrent() | |
// Block until calcuation is complete | |
commandBuffer.waitUntilCompleted() | |
let timeElapsed = CFAbsoluteTimeGetCurrent() - startTime | |
print("Time elapsed for GPU calculation: \(timeElapsed) s.") | |
verifyResults() | |
} | |
func encodeAddCommand(computeEncoder:MTLComputeCommandEncoder) { | |
// Encode the pipeline state object and it's parameters | |
computeEncoder.setComputePipelineState(addFunctionPSO) | |
computeEncoder.setBuffer(mBufferA, offset: 0, index: 0) | |
computeEncoder.setBuffer(mBufferB, offset: 0, index: 1) | |
computeEncoder.setBuffer(mBufferResult, offset: 0, index: 2) | |
let gridSize = MTLSizeMake(arrayLength, 1, 1) | |
// Calculate a threadgroup size | |
var threadGroupSize = addFunctionPSO.maxTotalThreadsPerThreadgroup | |
if (threadGroupSize > arrayLength) { | |
threadGroupSize = arrayLength | |
} | |
let threadgroupSize = MTLSizeMake(threadGroupSize, 1, 1) | |
// Encode the compute command. | |
computeEncoder.dispatchThreads(gridSize, threadsPerThreadgroup: threadgroupSize) | |
computeEncoder.endEncoding() | |
} | |
func generateRandomFloatData(buffer:MTLBuffer) { | |
let dataPtr = buffer.contents().assumingMemoryBound(to: Float.self) | |
for index in 0...arrayLength { | |
dataPtr[Int(index)] = Float(arc4random())/Float(RAND_MAX) | |
} | |
} | |
func verifyResults() { | |
let a = mBufferA!.contents().assumingMemoryBound(to: Float.self) | |
let b = mBufferB!.contents().assumingMemoryBound(to: Float.self) | |
let result = mBufferResult!.contents().assumingMemoryBound(to: Float.self) | |
let startTime = CFAbsoluteTimeGetCurrent() | |
for index in 0..<arrayLength { | |
let total = a[index] + b[index] | |
if(result[index] != total) { | |
print("Compute error index=\(index) result=\(result[index]) \(total)=a+b") | |
assert(result[index] != total) | |
} | |
} | |
let timeElapsed = CFAbsoluteTimeGetCurrent() - startTime | |
print("Time elapsed for CPU calculation: \(timeElapsed) s.") | |
print("Compute results as expected") | |
} | |
} | |
guard let device = MTLCreateSystemDefaultDevice() else { | |
fatalError("GPU is not supported") | |
} | |
let adder = try MetalAdder(withDevice:device) | |
adder.prepareData() | |
adder.sendComputeCommand() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment