Skip to content

Instantly share code, notes, and snippets.

@skooch
Last active April 28, 2026 08:58
Show Gist options
  • Select an option

  • Save skooch/6f1b756afdccc66e9e122fb95cf9d79d to your computer and use it in GitHub Desktop.

Select an option

Save skooch/6f1b756afdccc66e9e122fb95cf9d79d to your computer and use it in GitHub Desktop.
MoltenVK #2220 — allowGPUOptimizedContents predicate probe (companion to PR fix)

MoltenVK #2220 — allowGPUOptimizedContents predicate probe

Companion artifact for KhronosGroup/MoltenVK#2220.

The probe creates a VkImage for each combination of (format, usage, create-flags) in a small matrix and reads back the resulting MTLTexture.allowGPUOptimizedContents (and MTLTexture.usage) via VK_EXT_metal_objects. It exists to make the impact of a candidate predicate change in MVKImagePlane::newMTLTextureDescriptor() visible without needing to reproduce the end-to-end channel corruption (which only manifests inside specific game render pipelines).

Build

# Point MVK at any MoltenVK source tree that has been built (Package/Debug/MoltenVK or Package/Release/MoltenVK)
MVK=/path/to/MoltenVK/Package/Debug/MoltenVK
VKH="$MVK/../../External/Vulkan-Headers/include"   # or any Vulkan-Headers tree

clang++ -std=c++17 -Wall -O0 -g -fobjc-arc -I "$VKH" usage_probe.mm \
  -L "$MVK/dynamic/dylib/macOS" -lMoltenVK -Wl,-rpath,"$MVK/dynamic/dylib/macOS" \
  -framework Foundation -framework Metal -framework IOSurface \
  -framework CoreGraphics -framework QuartzCore -framework IOKit \
  -o usage_probe

./usage_probe

What the output means

For each row, vk usage is what a Vulkan app requested in VkImageCreateInfo::usage, mtl usage is the resulting MTLTextureUsage MoltenVK set on the underlying MTLTexture, and allowGPUOpt is MTLTexture.allowGPUOptimizedContents. On Apple Silicon, allowGPUOptimizedContents = NO is the documented Metal API for opting a texture out of lossless framebuffer compression (see Apple's "Optimizing texture data" doc).

A correct predicate change should:

  • flip allowGPUOpt to NO on the #2220 RT+SAMPLED+SRC and PostProc RT+SAMPLED+SRC+DST rows (color render targets used as transfer sources)
  • leave allowGPUOpt = YES on the Game asset SAMPLED+DST+SRC row (this is the population PR #2088 was specifically protecting — compression must be preserved for sampled asset textures with TRANSFER_SRC set for mip generation)
  • not set MTLTextureUsagePixelFormatView on any row (the previous candidate fix used PixelFormatView as a side-effect; the current fix uses the explicit Metal API instead)
// MoltenVK #2220 probe: create VkImages with many usage/flag combinations
// and report whether each resulting MTLTexture has lossless compression
// disabled (allowGPUOptimizedContents = NO) and what its MTLTextureUsage is.
//
// On Apple Silicon, allowGPUOptimizedContents = NO is the documented Metal
// API for opting a texture out of lossless framebuffer compression. This
// probe makes the impact of a predicate change in MVKImagePlane visible
// without needing to reproduce the end-to-end channel corruption.
//
// Uses VK_EXT_metal_objects / vkExportMetalObjectsEXT to get the Metal
// texture. Links directly to libMoltenVK.dylib.
#import <Metal/Metal.h>
#include <vulkan/vulkan.h>
#include <vulkan/vulkan_metal.h>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <vector>
#include <string>
#define VK_CHECK(expr) do { VkResult _r = (expr); if (_r != VK_SUCCESS) { \
std::fprintf(stderr, "%s failed: %d at %s:%d\n", #expr, _r, __FILE__, __LINE__); std::exit(1);} } while (0)
static uint32_t findMemType(VkPhysicalDevice pd, uint32_t bits, VkMemoryPropertyFlags req) {
VkPhysicalDeviceMemoryProperties mp;
vkGetPhysicalDeviceMemoryProperties(pd, &mp);
for (uint32_t i = 0; i < mp.memoryTypeCount; ++i) {
if ((bits & (1u << i)) && (mp.memoryTypes[i].propertyFlags & req) == req) return i;
}
return 0;
}
static std::string usageStr(VkImageUsageFlags u) {
std::string s;
auto add = [&](const char* n){ if(!s.empty()) s += "|"; s += n; };
if (u & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) add("SRC");
if (u & VK_IMAGE_USAGE_TRANSFER_DST_BIT) add("DST");
if (u & VK_IMAGE_USAGE_SAMPLED_BIT) add("SAMPLED");
if (u & VK_IMAGE_USAGE_STORAGE_BIT) add("STORAGE");
if (u & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) add("COLOR");
if (u & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) add("DS");
if (u & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) add("INPUT");
if (u & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) add("TRANSIENT");
return s.empty() ? std::string("0") : s;
}
static std::string mtlUsageStr(MTLTextureUsage u) {
std::string s;
auto add = [&](const char* n){ if(!s.empty()) s += "|"; s += n; };
if (u == 0) return "Unknown";
if (u & MTLTextureUsageShaderRead) add("ShaderRead");
if (u & MTLTextureUsageShaderWrite) add("ShaderWrite");
if (u & MTLTextureUsageRenderTarget) add("RenderTarget");
if (u & MTLTextureUsagePixelFormatView) add("PixelFormatView");
return s.empty() ? std::string("0") : s;
}
static const char* fmtStr(VkFormat f) {
switch (f) {
case VK_FORMAT_R16G16B16A16_SFLOAT: return "RGBA16F";
case VK_FORMAT_R8G8B8A8_UNORM: return "RGBA8";
case VK_FORMAT_B8G8R8A8_UNORM: return "BGRA8";
case VK_FORMAT_R32G32B32A32_SFLOAT: return "RGBA32F";
case VK_FORMAT_A2B10G10R10_UNORM_PACK32: return "RGB10A2";
default: return "?";
}
}
int main() {
VkApplicationInfo ai{VK_STRUCTURE_TYPE_APPLICATION_INFO};
ai.apiVersion = VK_API_VERSION_1_2;
VkInstanceCreateInfo ici{VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO};
ici.pApplicationInfo = &ai;
VkInstance inst;
VK_CHECK(vkCreateInstance(&ici, nullptr, &inst));
uint32_t pdCnt = 0;
vkEnumeratePhysicalDevices(inst, &pdCnt, nullptr);
std::vector<VkPhysicalDevice> pds(pdCnt);
vkEnumeratePhysicalDevices(inst, &pdCnt, pds.data());
VkPhysicalDevice pd = pds[0];
VkPhysicalDeviceProperties props;
vkGetPhysicalDeviceProperties(pd, &props);
std::printf("Device: %s\n\n", props.deviceName);
uint32_t qfc = 0;
vkGetPhysicalDeviceQueueFamilyProperties(pd, &qfc, nullptr);
std::vector<VkQueueFamilyProperties> qfp(qfc);
vkGetPhysicalDeviceQueueFamilyProperties(pd, &qfc, qfp.data());
uint32_t qf = ~0u;
for (uint32_t i = 0; i < qfc; ++i) {
if (qfp[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { qf = i; break; }
}
float prio = 1.0f;
VkDeviceQueueCreateInfo qci{VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO};
qci.queueFamilyIndex = qf;
qci.queueCount = 1;
qci.pQueuePriorities = &prio;
const char* devExts[] = { "VK_EXT_metal_objects" };
VkDeviceCreateInfo dci{VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO};
dci.queueCreateInfoCount = 1;
dci.pQueueCreateInfos = &qci;
dci.enabledExtensionCount = 1;
dci.ppEnabledExtensionNames = devExts;
VkDevice dev;
VK_CHECK(vkCreateDevice(pd, &dci, nullptr, &dev));
auto vkExportMetalObjectsEXT_fn =
(PFN_vkExportMetalObjectsEXT)vkGetDeviceProcAddr(dev, "vkExportMetalObjectsEXT");
if (!vkExportMetalObjectsEXT_fn) {
std::fprintf(stderr, "vkExportMetalObjectsEXT not found (extension not enabled?)\n");
return 1;
}
// Build a probe case: (format, usage, flags, label).
struct Case { VkFormat fmt; VkImageUsageFlags usage; VkImageCreateFlags flags; const char* label; };
std::vector<Case> cases;
// A set of plausible usage combinations, centered on the #2220 pattern.
VkImageUsageFlags base_RT_SAMPLED_SRC =
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
VkImageUsageFlags base_GameAsset =
VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT; // some engines add SRC for mip gen
VkImageUsageFlags base_PostProc =
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT;
VkImageUsageFlags base_RTOnly =
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT;
VkImageUsageFlags base_Swapchainish =
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
VkFormat fmts[] = {
VK_FORMAT_R16G16B16A16_SFLOAT,
VK_FORMAT_R8G8B8A8_UNORM,
VK_FORMAT_B8G8R8A8_UNORM,
VK_FORMAT_R32G32B32A32_SFLOAT,
};
for (VkFormat f : fmts) {
cases.push_back({f, base_RT_SAMPLED_SRC, 0, "#2220 RT+SAMPLED+SRC"});
cases.push_back({f, base_RT_SAMPLED_SRC, VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, "#2220 RT+SAMPLED+SRC + MUTABLE"});
cases.push_back({f, base_PostProc, 0, "PostProc RT+SAMPLED+SRC+DST"});
cases.push_back({f, base_GameAsset, 0, "Game asset SAMPLED+DST+SRC"});
cases.push_back({f, VK_IMAGE_USAGE_SAMPLED_BIT|VK_IMAGE_USAGE_TRANSFER_DST_BIT, 0, "Game asset SAMPLED+DST"});
cases.push_back({f, base_RTOnly, 0, "RT+SAMPLED (no SRC)"});
cases.push_back({f, base_Swapchainish, 0, "Swapchain-ish RT+SRC"});
cases.push_back({f, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 0, "RT only"});
}
std::printf("%-10s %-32s %s %-36s %-25s %s\n",
"format", "case", "fl", "vk usage", "mtl usage", "allowGPUOpt");
std::printf("---------- -------------------------------- -- ------------------------------------ ------------------------- -----------\n");
for (const Case& c : cases) {
VkExportMetalObjectCreateInfoEXT expCI{VK_STRUCTURE_TYPE_EXPORT_METAL_OBJECT_CREATE_INFO_EXT};
expCI.exportObjectType = VK_EXPORT_METAL_OBJECT_TYPE_METAL_TEXTURE_BIT_EXT;
VkImageCreateInfo ici{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
ici.pNext = &expCI;
ici.imageType = VK_IMAGE_TYPE_2D;
ici.format = c.fmt;
ici.extent = {256, 256, 1};
ici.mipLevels = 1;
ici.arrayLayers = 1;
ici.samples = VK_SAMPLE_COUNT_1_BIT;
ici.tiling = VK_IMAGE_TILING_OPTIMAL;
ici.usage = c.usage;
ici.flags = c.flags;
VkImage img;
VkResult r = vkCreateImage(dev, &ici, nullptr, &img);
if (r != VK_SUCCESS) {
std::printf("%-10s %-32s %02x %-36s vkCreateImage=%d\n",
fmtStr(c.fmt), c.label, c.flags, usageStr(c.usage).c_str(), r);
continue;
}
VkMemoryRequirements mr;
vkGetImageMemoryRequirements(dev, img, &mr);
VkMemoryAllocateInfo mai{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
mai.allocationSize = mr.size;
mai.memoryTypeIndex = findMemType(pd, mr.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
VkDeviceMemory mem;
VK_CHECK(vkAllocateMemory(dev, &mai, nullptr, &mem));
VK_CHECK(vkBindImageMemory(dev, img, mem, 0));
VkExportMetalTextureInfoEXT tinfo{VK_STRUCTURE_TYPE_EXPORT_METAL_TEXTURE_INFO_EXT};
tinfo.image = img;
tinfo.plane = VK_IMAGE_ASPECT_PLANE_0_BIT;
VkExportMetalObjectsInfoEXT oinfo{VK_STRUCTURE_TYPE_EXPORT_METAL_OBJECTS_INFO_EXT};
oinfo.pNext = &tinfo;
vkExportMetalObjectsEXT_fn(dev, &oinfo);
id<MTLTexture> tex = tinfo.mtlTexture;
MTLTextureUsage mu = tex ? tex.usage : (MTLTextureUsage)0;
const char* allowGPUOpt = tex ? (tex.allowGPUOptimizedContents ? "YES" : "NO") : "?";
std::printf("%-10s %-32s %02x %-36s %-25s %s\n",
fmtStr(c.fmt), c.label, c.flags, usageStr(c.usage).c_str(),
mtlUsageStr(mu).c_str(), allowGPUOpt);
vkDestroyImage(dev, img, nullptr);
vkFreeMemory(dev, mem, nullptr);
}
vkDestroyDevice(dev, nullptr);
vkDestroyInstance(inst, nullptr);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment