Skip to content

Instantly share code, notes, and snippets.

@mlaves
Last active October 1, 2023 08:14
Show Gist options
  • Save mlaves/61bc47956b0da377ccd0eb4a92c28904 to your computer and use it in GitHub Desktop.
Save mlaves/61bc47956b0da377ccd0eb4a92c28904 to your computer and use it in GitHub Desktop.
Example of coremltools segfault on larger input sizes.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d4eca7b9-b38c-45ff-b3af-f63fc4d656be",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"scikit-learn version 1.3.0 is not supported. Minimum required version: 0.17. Maximum required version: 1.1.2. Disabling scikit-learn conversion API.\n",
"TensorFlow version 2.13.0 has not been tested with coremltools. You may run into unexpected errors. TensorFlow 2.12.0 is the most recent version that has been tested.\n",
"Torch version 2.0.1 has not been tested with coremltools. You may run into unexpected errors. Torch 2.0.0 is the most recent version that has been tested.\n"
]
}
],
"source": [
"import coremltools as ct\n",
"import torch\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "51b0e10a-6a40-4026-b021-cf18f7297ec9",
"metadata": {},
"outputs": [],
"source": [
"from typing import Sequence, Optional\n",
"from itertools import pairwise\n",
"\n",
"\n",
"class UNet3D(torch.nn.Module):\n",
" def __init__(\n",
" self,\n",
" in_channels: int = 1,\n",
" num_classes: int = 2,\n",
" num_filters: Sequence[int] = (32, 64, 128, 256, 320),\n",
" ) -> None:\n",
" super().__init__()\n",
" self._in_channels = in_channels\n",
" self._num_classes = num_classes\n",
" self._num_filters = num_filters\n",
"\n",
" self._init_conv = UNet3DBlock(self._in_channels, num_filters[0], stride=1)\n",
"\n",
" self._encoder_list = torch.nn.ModuleList([\n",
" UNet3DBlock(in_f, out_f, stride=2)\n",
" for in_f, out_f in pairwise(self._num_filters)])\n",
"\n",
" # bottleneck\n",
" self._encoder_list.append(UNet3DUpBlock(num_filters[-1], num_filters[-1], num_filters[-1], stride=2))\n",
"\n",
" self._decoder_list = torch.nn.ModuleList([\n",
" UNet3DUpBlock(2*in_f, in_f, out_f)\n",
" for in_f, out_f in pairwise(reversed(self._num_filters))])\n",
"\n",
" self._decoder_list.append(UNet3DUpBlock(num_filters[1], num_filters[0], num_filters[0], stride=1, tconv=False))\n",
" self._final_conv = torch.nn.Conv3d(num_filters[0], num_classes, kernel_size=1, padding=0)\n",
"\n",
" def encoder(self, x: torch.Tensor) -> Sequence[torch.Tensor]:\n",
" skips = [self._init_conv(x)]\n",
" for enc in self._encoder_list:\n",
" skips.append(enc(skips[-1]))\n",
"\n",
" return skips\n",
"\n",
" def decoder(self, skips: Sequence[torch.Tensor]) -> torch.Tensor:\n",
" x = skips[-1]\n",
" skips = skips[:-1][::-1]\n",
" for i, dec in enumerate(self._decoder_list):\n",
" x = dec(x, skips[i])\n",
"\n",
" return x\n",
"\n",
" def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
" skips = self.encoder(x)\n",
" y = self.decoder(skips)\n",
" y = self._final_conv(y)\n",
"\n",
" return y\n",
"\n",
"\n",
"class UNet3DBlock(torch.nn.Module):\n",
" def __init__(\n",
" self,\n",
" in_channels: int,\n",
" out_channels: int,\n",
" stride: int = 1\n",
" ) -> None:\n",
" super().__init__()\n",
" self._conv0 = torch.nn.Conv3d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)\n",
" self._conv1 = torch.nn.Conv3d(out_channels, out_channels, kernel_size=3, padding=1)\n",
" self._act_fn = torch.nn.LeakyReLU(negative_slope=0.01, inplace=True)\n",
"\n",
" def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
" y = self._act_fn(self._conv0(x))\n",
" y = self._act_fn(self._conv1(y))\n",
" return y\n",
"\n",
"\n",
"class UNet3DUpBlock(torch.nn.Module):\n",
" def __init__(\n",
" self,\n",
" in_channels: int,\n",
" hidden_channels: int,\n",
" out_channels: int,\n",
" stride: int = 1,\n",
" tconv: bool = True,\n",
" ) -> None:\n",
" super().__init__()\n",
" self._block = UNet3DBlock(in_channels, hidden_channels, stride)\n",
" self._tconv = torch.nn.ConvTranspose3d(\n",
" hidden_channels, out_channels, kernel_size=2, stride=2\n",
" ) if tconv else torch.nn.Identity()\n",
"\n",
" def forward(self, x, skip=None):\n",
" if skip is not None:\n",
" x = torch.cat([x, skip], dim=1)\n",
" \n",
" x = self._block(x)\n",
" x = self._tconv(x)\n",
"\n",
" return x"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "ea2e8e32-7027-414e-a95f-4f582c7fa127",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([1, 25, 128, 128, 128])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"net = UNet3D(in_channels=1, num_classes=25)\n",
"net.eval()\n",
"x = torch.randn(1, 1, 128, 128, 128)\n",
"net(x).shape"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c04bf7d7-4ae7-4832-a495-7a23e100b760",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([1, 25, 128, 128, 128])"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"traced = torch.jit.trace(net, x)\n",
"net(x).shape"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "d811acca-8d61-485d-8dd5-67c643793664",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Converting PyTorch Frontend ==> MIL Ops: 100%|▉| 239/240 [00:00<00:00, 2034.62 o\n",
"Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 1954.84 passes/\n",
"Running MIL default pipeline: 100%|███████| 63/63 [00:00<00:00, 618.89 passes/s]\n",
"Running MIL backend_neuralnetwork pipeline: 100%|█| 9/9 [00:00<00:00, 2637.74 pa\n",
"Translating MIL ==> NeuralNetwork Ops: 100%|█| 251/251 [00:01<00:00, 138.25 ops/\n"
]
}
],
"source": [
"input_shape = ct.Shape(shape=(1,\n",
" 1,\n",
" ct.RangeDim(lower_bound=32, upper_bound=128, default=64),\n",
" ct.RangeDim(lower_bound=32, upper_bound=128, default=64),\n",
" ct.RangeDim(lower_bound=32, upper_bound=128, default=64)))\n",
"\n",
"mlmodel = ct.convert(\n",
" traced,\n",
" inputs=[ct.TensorType(name=\"input\", shape=input_shape, dtype=np.float32)],\n",
" outputs=[ct.TensorType(name=\"output\")],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "0e6aba2d-5115-4749-8c60-98a54875b733",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1, 25, 64, 64, 64)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.rand(1, 1, 64, 64, 64) # works fine\n",
"mlmodel.predict({\"input\": x.numpy()})[\"output\"].shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "c2ce6cbd-cdf3-48e1-a65b-31fec57b41f9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1, 25, 96, 96, 96)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = torch.rand(1, 1, 96, 96, 96) # doesn't crash, but orders of magnitude slower\n",
"mlmodel.predict({\"input\": x.numpy()})[\"output\"].shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e1ef162-192c-4f1b-b3a4-e1bfa104a471",
"metadata": {},
"outputs": [],
"source": [
"x = torch.rand(1, 1, 128, 128, 128) # segfault\n",
"mlmodel.predict({\"input\": x.numpy()})[\"output\"].shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "edb99029-c048-4073-86f8-6d8f1358bac9",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Process 1528 launched: '/Users/laves/miniforge3/envs/py310/bin/python' (arm64)
scikit-learn version 1.3.0 is not supported. Minimum required version: 0.17. Maximum required version: 1.1.2. Disabling scikit-learn conversion API.
TensorFlow version 2.13.0 has not been tested with coremltools. You may run into unexpected errors. TensorFlow 2.12.0 is the most recent version that has been tested.
Torch version 2.0.1 has not been tested with coremltools. You may run into unexpected errors. Torch 2.0.0 is the most recent version that has been tested.
2023-10-01 10:13:19.894994+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE.
2023-10-01 10:13:19.895120+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE.
2023-10-01 10:13:19.895168+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE.
2023-10-01 10:13:19.895234+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE.
2023-10-01 10:13:19.895341+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE.
2023-10-01 10:13:19.895551+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE.
2023-10-01 10:13:19.896695+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE.
2023-10-01 10:13:19.901464+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE.
2023-10-01 10:13:19.908130+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE.
2023-10-01 10:13:19.917683+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueIxLNS0_13AttributeKindE4EEE, N5torch3jit20ScalarAttributeValueIxLNS0_13AttributeKindE4EEE.
Converting PyTorch Frontend ==> MIL Ops: 100% 239/240 [00:00<00:00, 5977.04 ops/s]
Running MIL frontend_pytorch pipeline: 100% 5/5 [00:00<00:00, 1876.31 passes/s]
Running MIL default pipeline: 100% 63/63 [00:00<00:00, 669.39 passes/s]
Running MIL backend_neuralnetwork pipeline: 100% 9/9 [00:00<00:00, 2604.08 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100% 251/251 [00:01<00:00, 175.94 ops/s]
Process 1528 stopped
* thread #1, queue = 'com.apple.CoreMLBatchProcessingQueue', stop reason = EXC_BAD_ACCESS (code=2, address=0x2c1000000)
frame #0: 0x000000019842150c Espresso`Espresso::conv3d_kernel_cpu::__launch(std::__1::shared_ptr<Espresso::abstract_batch> const&, Espresso::layer_data&, Espresso::layer_data&) + 1524
Espresso`Espresso::conv3d_kernel_cpu::__launch:
-> 0x19842150c <+1524>: str wzr, [x8, w9, sxtw #2]
0x198421510 <+1528>: add w9, w9, #0x1
0x198421514 <+1532>: subs x10, x10, #0x1
0x198421518 <+1536>: b.ne 0x19842150c ; <+1524>
Target 0: (python) stopped.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment