Created
May 27, 2025 20:21
-
-
Save philipturner/7f2b3da4ae719bb28d3b60ebfc1e0f60 to your computer and use it in GitHub Desktop.
Fourth set of files saved for easy reference, while cleaning up an iteration of the Windows port of Molecular Renderer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Next steps: | |
// - Access the GPU. | |
// - Modify it to get Metal rendering. [DONE] | |
// - Clean up and simplify the code as much as possible. [DONE] | |
// - Get timestamps synchronizing properly (moving rainbow banner | |
// scene). [DONE] | |
// - Repeat the same process with COM / D3D12 on Windows. | |
// - Get some general experience with C++ DirectX sample code. | |
// - Modify the files one-by-one to support Windows. | |
import MolecularRenderer | |
#if os(macOS) | |
import Metal | |
@MainActor | |
func createApplication() -> Application { | |
// Set up the display. | |
var displayDesc = DisplayDescriptor() | |
displayDesc.renderTargetSize = 1920 | |
displayDesc.screenID = Display.fastestScreenID | |
let display = Display(descriptor: displayDesc) | |
// Set up the GPU context. | |
var gpuContextDesc = GPUContextDescriptor() | |
gpuContextDesc.deviceID = GPUContext.fastestDeviceID | |
let gpuContext = GPUContext(descriptor: gpuContextDesc) | |
// Set up the application. | |
var applicationDesc = ApplicationDescriptor() | |
applicationDesc.display = display | |
applicationDesc.gpuContext = gpuContext | |
let application = Application(descriptor: applicationDesc) | |
return application | |
} | |
func createShaderSource() -> String { | |
""" | |
#include <metal_stdlib> | |
using namespace metal; | |
half convertToChannel( | |
half hue, | |
half saturation, | |
half lightness, | |
ushort n | |
) { | |
half k = half(n) + hue / 30; | |
k -= 12 * floor(k / 12); | |
half a = saturation; | |
a *= min(lightness, 1 - lightness); | |
half output = min(k - 3, 9 - k); | |
output = max(output, half(-1)); | |
output = min(output, half(1)); | |
output = lightness - a * output; | |
return output; | |
} | |
kernel void renderImage( | |
constant float *time0 [[buffer(0)]], | |
constant float *time1 [[buffer(1)]], | |
constant float *time2 [[buffer(2)]], | |
texture2d<half, access::write> drawableTexture [[texture(0)]], | |
ushort2 tid [[thread_position_in_grid]] | |
) { | |
half4 color; | |
if (tid.y < 1600) { | |
color = half4(0.707, 0.707, 0.00, 1.00); | |
} else { | |
float progress = float(tid.x) / 1920; | |
if (tid.y < 1600 + 107) { | |
progress += *time0; | |
} else if (tid.y < 1600 + 213) { | |
progress += *time1; | |
} else { | |
progress += *time2; | |
} | |
half hue = half(progress) * 360; | |
half saturation = 1.0; | |
half lightness = 0.5; | |
half red = convertToChannel(hue, saturation, lightness, 0); | |
half green = convertToChannel(hue, saturation, lightness, 8); | |
half blue = convertToChannel(hue, saturation, lightness, 4); | |
color = half4(red, green, blue, 1.00); | |
} | |
drawableTexture.write(color, tid); | |
} | |
""" | |
} | |
func createRenderPipeline( | |
application: Application, | |
shaderSource: String | |
) -> MTLComputePipelineState { | |
let device = application.gpuContext.device | |
let shaderSource = createShaderSource() | |
let library = try! device.makeLibrary(source: shaderSource, options: nil) | |
let function = library.makeFunction(name: "renderImage") | |
guard let function else { | |
fatalError("Could not make function.") | |
} | |
let pipeline = try! device.makeComputePipelineState(function: function) | |
return pipeline | |
} | |
// Set up the resources. | |
let application = createApplication() | |
let shaderSource = createShaderSource() | |
let renderPipeline = createRenderPipeline( | |
application: application, | |
shaderSource: shaderSource) | |
var startTime: UInt64? | |
var frameID: Int = .zero | |
// Enter the run loop. | |
application.run { renderTarget in | |
frameID += 1 | |
// Start the command encoder. | |
let commandQueue = application.gpuContext.commandQueue | |
let commandBuffer = commandQueue.makeCommandBuffer()! | |
let encoder = commandBuffer.makeComputeCommandEncoder()! | |
// Bind the buffers. | |
do { | |
func setTime(_ time: Double, index: Int) { | |
let fractionalTime = time - floor(time) | |
var time32 = Float(fractionalTime) | |
encoder.setBytes(&time32, length: 4, index: index) | |
} | |
if let startTime { | |
let currentTime = mach_continuous_time() | |
let timeSeconds = Double(currentTime - startTime) / 24_000_000 | |
setTime(timeSeconds, index: 0) | |
} else { | |
startTime = mach_continuous_time() | |
setTime(Double.zero, index: 0) | |
} | |
let clock = application.clock | |
let timeInFrames = clock.frames | |
let framesPerSecond = application.display.frameRate | |
let timeInSeconds = Double(timeInFrames) / Double(framesPerSecond) | |
setTime(timeInSeconds, index: 1) | |
setTime(Double.zero, index: 2) | |
} | |
// Bind the textures. | |
encoder.setTexture(renderTarget, index: 0) | |
// Dispatch | |
do { | |
encoder.setComputePipelineState(renderPipeline) | |
let width = Int(renderTarget.width) | |
let height = Int(renderTarget.height) | |
encoder.dispatchThreads( | |
MTLSize(width: width, height: height, depth: 1), | |
threadsPerThreadgroup: MTLSize(width: 8, height: 8, depth: 1)) | |
} | |
// End the command encoder. | |
encoder.endEncoding() | |
commandBuffer.commit() | |
} | |
#endif | |
#if os(Windows) | |
import SwiftCOM | |
import WinSDK | |
// Objectives: | |
// (1) Integrate the debug layer into device initialization. Set it to break | |
// only on 'ERROR'. | |
// (2) Integrate state tracking into Buffer. | |
// (3) Create an instance member of Buffer called 'transition', which returns | |
// a DirectX resource barrier value type. | |
// (4) Reproduce the previous code for copy commands, and verify that the debug | |
// layer is working as expected. | |
#if false | |
let vectorAddition = VectorAddition(device: device) | |
let commandQueue = CommandQueue(device: device) | |
let commandList = commandQueue.createCommandList() | |
// Copy command: inputBuffer0 -> nativeBuffer0 | |
do { | |
let barrier = vectorAddition.nativeBuffer0 | |
.transition(state: D3D12_RESOURCE_STATE_COPY_DEST) | |
let barriers = [barrier] | |
try! commandList.ResourceBarrier( | |
UInt32(barriers.count), barriers) | |
try! commandList.CopyResource( | |
vectorAddition.nativeBuffer0.d3d12Resource, | |
vectorAddition.inputBuffer0.d3d12Resource) | |
} | |
print("Encoded command 1 successfully.") | |
// Copy command: inputBuffer1 -> nativeBuffer1 | |
do { | |
let barrier = vectorAddition.nativeBuffer1 | |
.transition(state: D3D12_RESOURCE_STATE_COPY_DEST) | |
let barriers = [barrier] | |
try! commandList.ResourceBarrier( | |
UInt32(barriers.count), barriers) | |
try! commandList.CopyResource( | |
vectorAddition.nativeBuffer1.d3d12Resource, | |
vectorAddition.inputBuffer1.d3d12Resource) | |
} | |
print("Encoded command 2 successfully.") | |
// Copy command: nativeBuffer0 -> nativeBuffer2 | |
do { | |
let barrier0 = vectorAddition.nativeBuffer0 | |
.transition(state: D3D12_RESOURCE_STATE_COPY_SOURCE) | |
let barrier2 = vectorAddition.nativeBuffer2 | |
.transition(state: D3D12_RESOURCE_STATE_COPY_DEST) | |
let barriers = [barrier0, barrier2] | |
try! commandList.ResourceBarrier( | |
UInt32(barriers.count), barriers) | |
try! commandList.CopyResource( | |
vectorAddition.nativeBuffer2.d3d12Resource, | |
vectorAddition.nativeBuffer0.d3d12Resource) | |
} | |
print("Encoded command 3 successfully.") | |
// Copy command: nativeBuffer2 -> outputBuffer2 | |
do { | |
let barrier = vectorAddition.nativeBuffer2 | |
.transition(state: D3D12_RESOURCE_STATE_COPY_SOURCE) | |
let barriers = [barrier] | |
try! commandList.ResourceBarrier( | |
UInt32(barriers.count), barriers) | |
try! commandList.CopyResource( | |
vectorAddition.outputBuffer2.d3d12Resource, | |
vectorAddition.nativeBuffer2.d3d12Resource) | |
} | |
print("Encoded command 4 successfully.") | |
commandQueue.commit(commandList) | |
commandQueue.flush() | |
print("The commands completed on the GPU.") | |
// Check the data in the output buffer. | |
do { | |
var outputData2: [Float] = [] | |
for i in 0..<1024 { | |
outputData2.append(0) | |
} | |
outputData2.withUnsafeMutableBytes { bufferPointer in | |
let baseAddress = bufferPointer.baseAddress! | |
vectorAddition.outputBuffer2 | |
.read(output: baseAddress) | |
} | |
for slotID in 0..<10 { | |
let value2 = outputData2[slotID] | |
print("outputBuffer[\(slotID)] = \(value2)") | |
} | |
} | |
#endif | |
// On to the next task. Before doing it, remind myself: | |
// - What is the ultimate goal? | |
// - What is the next step after this one, toward the ultimate goal? | |
// | |
// Task 1: How do I "bind buffers" to the root signature? | |
// Task 2: What are the UAV barriers and should resources explicitly transition | |
// to the UAV state? | |
// ## Task 1 | |
// | |
// The root signature is specified in HLSL. It declares that: | |
// - The buffer at location 'u0' is UAV. | |
// - The buffer at location 'u1' is UAV. | |
// - The buffer at location 'u2' is UAV. | |
// | |
// Here is how the root signature and PSO are initialized: | |
// - Compile the shader source code. | |
// - Extract the root signature and shader blobs. | |
// - Call 'ID3D12Device.CreateRootSignature' on the root signature blob. | |
// - Fill a PSO descriptor with: | |
// - The shader blob. | |
// - The root signature object. | |
// - An absence of a cached pipeline state. | |
// - Call 'ID3D12Device.CreatePipelineState' on the descriptor. | |
// | |
// Questions: | |
// - How do I bind the buffers to the command list? | |
// - How do I bind the pipeline state? | |
// - Must the root signature be explicitly bound as well? | |
// | |
// Start the search by reading through the 3DGEP tutorials and reference code. | |
// Then, finish off the search by reading Microsoft's online documentation. | |
// | |
// D3D12_VERTEX_BUFFER_VIEW exists. | |
// D3D12_INDEX_BUFFER_VIEW exists. | |
// D3D12_DEPTH_STENCIL_VIEW_DESC & device->CreateDepthStencilView, m_DSVHeap | |
// relies on CPU descriptor handle. | |
// commandList->ClearDepthStencilView(D3D12_CPU_DESCRIPTOR_HANDLE dsv) | |
// | |
// commandList->SetPipelineState(...) | |
// commandList->SetGraphicsRootSignature(...) | |
// commandList->IASetVertexBuffers(m_VertexBufferView) | |
// commandList->IASetIndexBuffer(m_IndexBufferView) | |
// commandList->DrawIndexedInstanced(...) | |
// | |
// SetGraphicsDynamicConstantBuffer(uint32_t index, void* data) | |
// allocate space in UploadBuffer "heap" | |
// copy data from pointer to D3D resource | |
// ID3D12GraphicsCommandList::SetGraphicsRootConstantBufferView | |
// using root parameter index and GPU virtual address | |
// | |
// SetShaderResourceView | |
// just uses the dynamic descriptor heap helper | |
// resource must be under a 'DESCRIPTOR_TABLE' root parameter | |
// | |
// CommitStagedDescriptorsForDraw | |
// commandList.SetDescriptorHeap(...) | |
// device->CopyDescriptors(...) | |
// | |
// CopyDescriptor | |
// commandList.SetDescriptorHeap(...) | |
// device->CopyDescriptorsSimple(CPU handle, CPU descriptor) | |
// | |
// Additional notes upon inspecting CommandList.cpp: | |
// ID3D12GraphicsCommandList::SetPipelineState(pipelineState.Get()) | |
// ID3D12GraphicsCommandList::SetComputeRootSignature(m_RootSignature) | |
// SetUnorderedAccessView just does the descriptor table stuff, not what I'm | |
// looking for. | |
// ID3D12GraphicsCommandList::Dispatch(numGroupsX, numGroupsY, numGroupsZ) | |
// ID3D12GraphicsCommandList::SetDescriptorHeaps(uint32_t count, void *heaps) | |
// | |
// Riccardo Loggini compute shaders tutorial: | |
// - Binds a texture to a descriptor table | |
// - Set the "descriptor heaps" (sic) of a command list | |
// - Creates a UAV desc on the CPU side for the texture | |
// - Encodes a barrier to transition the texture to UAV | |
// - Fetches a CPU descriptor handle from a heap, presumably for the UAV | |
// | |
// device->CreateUnorderedAccessView(myTexture.Get(), nullptr, uavDesc, | |
// myHeapUavDescriptor.GetDescriptorHandle()); | |
// copies the descriptor between two CPU handles | |
// m_d3d12CommandList->SetComputeRootDescriptorTable(0, GPU descriptor handle) | |
// I still need more information about resource binding. Check Microsoft's | |
// online documentation. | |
// | |
// ID3D12GraphicsCommandList::SetPipelineState | |
// - programs most of the fixed-function state of the GPU pipeline | |
// ID3D12GraphicsCommandList::SetComputeRootSignature | |
// - sets the layout of the compute root signaturer | |
// ID3D12GraphicsCommandList::ResourceBarrier | |
// ID3D12GraphicsCommandList::SetComputeRootUnorderedAccessView | |
// ID3D12GraphicsCommandList::SetComputeRootUnorderedAccessView | |
// ID3D12GraphicsCommandList::SetComputeRootUnorderedAccessView | |
// - sets a CPU descriptor handle for the UAV resource in the root signature | |
// ID3D12GraphicsCommandList::Dispatch | |
// - Microsoft documentation looks wrong / misworded | |
// ID3D12GraphicsCommandList::Close | |
// | |
// Functions for creating the GPU virtual address / CPU descriptor handle... | |
// https://learn.microsoft.com/en-us/windows/win32/direct3d12/using-descriptors-directly-in-the-root-signature | |
// | |
// Looks like it really is as easy as specifying the GPU address. Let's see if | |
// we can do that and bypass the DirectX debug layer that demands a resource | |
// state transition. | |
// Before that, clean up the process of initializing a shader. There's | |
// boilerplate code in the previous reference that ought to go into a utility | |
// class. | |
// TODO: Change ShaderBytecode to ShaderDescriptor, but make the descriptor | |
// and the Shader initializer internal. Change 'Compiler' to just | |
// 'DirectXDevice', and put 'compile' in an 'extension'. Keep all of that, as | |
// well as the 'dxcompiler_compile' reference, in the same file as 'Shader'. | |
// | |
// And finally, change DirectXDevice to just Device. This brings it closer to | |
// merging with the Metal backend in the future. | |
// | |
// Remove '.compile' and just make it a public API of ShaderDescriptor? There | |
// is something interesting about one-line functions to initialize an object. | |
// It creates less code on the front-end. But from an API design standpoint, | |
// it would be more consistent to have everything made with a descriptor. | |
// | |
// What is the paradigm for MM4? | |
// - MM4ForceField.init(descriptor:) | |
// - MM4Parameters.init(descriptor:) | |
// - MM4RigidBody.init(descriptor:) | |
// | |
// It's valid to have a descriptor where the user sets one or no options. | |
// | |
// How about this rule: | |
// - If only a single variable is required, and it's obvious, the initializer | |
// uses that. I had several nested functions in code that just required the | |
// 'device'. | |
// - The code for macOS just uses this rule in internal initializers. | |
// - Public initializers should always use a descriptor. | |
// - If two or more variables are required, always use a descriptor. Even if | |
// 'compile' looked familiar in previous code. Based on judgment, sometimes | |
// use a descriptor if there's just one variable. | |
// | |
// Task 1: Fix the existing code in the helpers. | |
// Task 2: Augment the 'Shader' class, making the blobs transient and instead | |
// exposing DirectX API objects to the public API. | |
// | |
// Remember: After all of this "Hello world" compute stuff is done, the next | |
// goal will be to merge the DirectX and Metal helper classes. At least for | |
// GPU compute work. After that's done, we can take steps to incorporate UI | |
// or app launching code on Windows. | |
// - The Metal and DirectX backends will still diverge on lack of support for | |
// textures. Thus, the next task for the DirectX backend is to render to a | |
// texture on the screen. Once this is done, we can start to merge the | |
// backends. | |
// - The Metal and DirectX backends differ in how threadgroup sizes are | |
// specified. Make the threadgroup size specified as part of the shader. | |
// Then, the Metal backend can reference it when ready to execute a command. | |
// MARK: - Initialize Most of the Resources | |
let device = Device() | |
print(device) | |
var commandQueueDesc = CommandQueueDescriptor() | |
commandQueueDesc.device = device | |
let commandQueue = CommandQueue(descriptor: commandQueueDesc) | |
print(commandQueue) | |
let vectorAddition = VectorAddition(device: device) | |
print(vectorAddition) | |
// MARK: - Create the Shader | |
func createShaderSource() -> String { | |
""" | |
RWStructuredBuffer<float> buffer0 : register(u0); | |
RWStructuredBuffer<float> buffer1 : register(u1); | |
RWStructuredBuffer<float> buffer2 : register(u2); | |
#define mainRS "UAV(u0), " \\ | |
"UAV(u1), " \\ | |
"UAV(u2)" | |
[numthreads(128, 1, 1)] | |
[RootSignature(mainRS)] | |
void main( | |
uint3 tid : SV_DispatchThreadID | |
) { | |
uint slotID = tid.x; | |
float input0 = buffer0[slotID]; | |
float input1 = buffer1[slotID]; | |
float output = input1 / input0; | |
buffer2[slotID] = output; | |
} | |
""" | |
} | |
var shaderDesc = ShaderDescriptor() | |
shaderDesc.device = device | |
shaderDesc.source = createShaderSource() | |
let shader = Shader(descriptor: shaderDesc) | |
print(shader.d3d12PipelineState) | |
print(shader.d3d12RootSignature) | |
let commandList = commandQueue.createCommandList() | |
// MARK: - Copy Commands (CPU -> GPU) | |
// Copy command: inputBuffer0 -> nativeBuffer0 | |
do { | |
let barrier = vectorAddition.nativeBuffer0 | |
.transition(state: D3D12_RESOURCE_STATE_COPY_DEST) | |
let barriers = [barrier] | |
try! commandList.ResourceBarrier( | |
UInt32(barriers.count), barriers) | |
try! commandList.CopyResource( | |
vectorAddition.nativeBuffer0.d3d12Resource, | |
vectorAddition.inputBuffer0.d3d12Resource) | |
} | |
print("Encoded command 1 successfully.") | |
// Copy command: inputBuffer1 -> nativeBuffer1 | |
do { | |
let barrier = vectorAddition.nativeBuffer1 | |
.transition(state: D3D12_RESOURCE_STATE_COPY_DEST) | |
let barriers = [barrier] | |
try! commandList.ResourceBarrier( | |
UInt32(barriers.count), barriers) | |
try! commandList.CopyResource( | |
vectorAddition.nativeBuffer1.d3d12Resource, | |
vectorAddition.inputBuffer1.d3d12Resource) | |
} | |
print("Encoded command 2 successfully.") | |
// MARK: - Compute Command | |
try! commandList.SetPipelineState(shader.d3d12PipelineState) | |
try! commandList.SetComputeRootSignature(shader.d3d12RootSignature) | |
print("Set the pipeline and root signature.") | |
// Encode all of the barriers. | |
do { | |
let barrier0 = vectorAddition.nativeBuffer0 | |
.transition(state: D3D12_RESOURCE_STATE_UNORDERED_ACCESS) | |
let barrier1 = vectorAddition.nativeBuffer1 | |
.transition(state: D3D12_RESOURCE_STATE_UNORDERED_ACCESS) | |
let barrier2 = vectorAddition.nativeBuffer2 | |
.transition(state: D3D12_RESOURCE_STATE_UNORDERED_ACCESS) | |
// The final barrier is a UAV barrier. I think you only need this between | |
// two compute commands, not before a string of them. It effectively | |
// marks every resource as UAV sensitive, flushing the cache before the | |
// command executes. We include it here to get experience with encoding | |
// UAV barriers. | |
var uavBarrier = D3D12_RESOURCE_BARRIER() | |
uavBarrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV | |
uavBarrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE | |
uavBarrier.UAV.pResource = nil | |
let barriers = [barrier0, barrier1, barrier2, uavBarrier] | |
try! commandList.ResourceBarrier( | |
UInt32(barriers.count), barriers) | |
} | |
print("Set the resource barriers.") | |
// Bind the buffers to the buffer table. | |
do { | |
func setUAV(index: UInt32, buffer: Buffer) { | |
let d3d12Resource = buffer.d3d12Resource | |
let gpuAddress = try! d3d12Resource.GetGPUVirtualAddress() | |
try! commandList | |
.SetComputeRootUnorderedAccessView(index, gpuAddress) | |
} | |
setUAV(index: 0, buffer: vectorAddition.nativeBuffer0) | |
setUAV(index: 1, buffer: vectorAddition.nativeBuffer1) | |
setUAV(index: 2, buffer: vectorAddition.nativeBuffer2) | |
} | |
print("Set the buffer bindings.") | |
// Dispatch 1024 threads, in 8 groups of 128. | |
try! commandList.Dispatch(8, 1, 1) | |
print("Encoded the kernel invocation.") | |
// MARK: - Copy Commands (GPU -> CPU) | |
// Copy command: nativeBuffer2 -> outputBuffer2 | |
do { | |
let barrier = vectorAddition.nativeBuffer2 | |
.transition(state: D3D12_RESOURCE_STATE_COPY_SOURCE) | |
let barriers = [barrier] | |
try! commandList.ResourceBarrier( | |
UInt32(barriers.count), barriers) | |
try! commandList.CopyResource( | |
vectorAddition.outputBuffer2.d3d12Resource, | |
vectorAddition.nativeBuffer2.d3d12Resource) | |
} | |
print("Encoded command 4 successfully.") | |
commandQueue.commit(commandList) | |
print("The commands were submitted.") | |
commandQueue.flush() | |
print("The commands finished executing.") | |
// MARK: - Checking Results | |
// Check the data in the output buffer. | |
do { | |
var outputData2: [Float] = [] | |
for _ in 0..<1024 { | |
outputData2.append(0) | |
} | |
outputData2.withUnsafeMutableBytes { bufferPointer in | |
let baseAddress = bufferPointer.baseAddress! | |
let outputBuffer2 = vectorAddition.outputBuffer2 | |
outputBuffer2.read(output: baseAddress) | |
} | |
for slotID in 0..<10 { | |
let value2 = outputData2[slotID] | |
print("outputBuffer[\(slotID)] = \(value2)") | |
} | |
} | |
// The "hello world" compute demo works! Next, render an image to the screen | |
// using only compute shaders. | |
// | |
// First research question: can you create a texture that's backed by a buffer? | |
// Is the drawable for rendering backed by a buffer? If not, each texture | |
// should own a unique descriptor table, encapsulated in the utility 'Texture'. | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment