Last active
October 1, 2023 08:14
-
-
Save mlaves/61bc47956b0da377ccd0eb4a92c28904 to your computer and use it in GitHub Desktop.
Example of coremltools segfault on larger input sizes.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "d4eca7b9-b38c-45ff-b3af-f63fc4d656be", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"scikit-learn version 1.3.0 is not supported. Minimum required version: 0.17. Maximum required version: 1.1.2. Disabling scikit-learn conversion API.\n", | |
"TensorFlow version 2.13.0 has not been tested with coremltools. You may run into unexpected errors. TensorFlow 2.12.0 is the most recent version that has been tested.\n", | |
"Torch version 2.0.1 has not been tested with coremltools. You may run into unexpected errors. Torch 2.0.0 is the most recent version that has been tested.\n" | |
] | |
} | |
], | |
"source": [ | |
"import coremltools as ct\n", | |
"import torch\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "51b0e10a-6a40-4026-b021-cf18f7297ec9", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from typing import Sequence, Optional\n", | |
"from itertools import pairwise\n", | |
"\n", | |
"\n", | |
"class UNet3D(torch.nn.Module):\n", | |
" def __init__(\n", | |
" self,\n", | |
" in_channels: int = 1,\n", | |
" num_classes: int = 2,\n", | |
" num_filters: Sequence[int] = (32, 64, 128, 256, 320),\n", | |
" ) -> None:\n", | |
" super().__init__()\n", | |
" self._in_channels = in_channels\n", | |
" self._num_classes = num_classes\n", | |
" self._num_filters = num_filters\n", | |
"\n", | |
" self._init_conv = UNet3DBlock(self._in_channels, num_filters[0], stride=1)\n", | |
"\n", | |
" self._encoder_list = torch.nn.ModuleList([\n", | |
" UNet3DBlock(in_f, out_f, stride=2)\n", | |
" for in_f, out_f in pairwise(self._num_filters)])\n", | |
"\n", | |
" # bottleneck\n", | |
" self._encoder_list.append(UNet3DUpBlock(num_filters[-1], num_filters[-1], num_filters[-1], stride=2))\n", | |
"\n", | |
" self._decoder_list = torch.nn.ModuleList([\n", | |
" UNet3DUpBlock(2*in_f, in_f, out_f)\n", | |
" for in_f, out_f in pairwise(reversed(self._num_filters))])\n", | |
"\n", | |
" self._decoder_list.append(UNet3DUpBlock(num_filters[1], num_filters[0], num_filters[0], stride=1, tconv=False))\n", | |
" self._final_conv = torch.nn.Conv3d(num_filters[0], num_classes, kernel_size=1, padding=0)\n", | |
"\n", | |
" def encoder(self, x: torch.Tensor) -> Sequence[torch.Tensor]:\n", | |
" skips = [self._init_conv(x)]\n", | |
" for enc in self._encoder_list:\n", | |
" skips.append(enc(skips[-1]))\n", | |
"\n", | |
" return skips\n", | |
"\n", | |
" def decoder(self, skips: Sequence[torch.Tensor]) -> torch.Tensor:\n", | |
" x = skips[-1]\n", | |
" skips = skips[:-1][::-1]\n", | |
" for i, dec in enumerate(self._decoder_list):\n", | |
" x = dec(x, skips[i])\n", | |
"\n", | |
" return x\n", | |
"\n", | |
" def forward(self, x: torch.Tensor) -> torch.Tensor:\n", | |
" skips = self.encoder(x)\n", | |
" y = self.decoder(skips)\n", | |
" y = self._final_conv(y)\n", | |
"\n", | |
" return y\n", | |
"\n", | |
"\n", | |
"class UNet3DBlock(torch.nn.Module):\n", | |
" def __init__(\n", | |
" self,\n", | |
" in_channels: int,\n", | |
" out_channels: int,\n", | |
" stride: int = 1\n", | |
" ) -> None:\n", | |
" super().__init__()\n", | |
" self._conv0 = torch.nn.Conv3d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)\n", | |
" self._conv1 = torch.nn.Conv3d(out_channels, out_channels, kernel_size=3, padding=1)\n", | |
" self._act_fn = torch.nn.LeakyReLU(negative_slope=0.01, inplace=True)\n", | |
"\n", | |
" def forward(self, x: torch.Tensor) -> torch.Tensor:\n", | |
" y = self._act_fn(self._conv0(x))\n", | |
" y = self._act_fn(self._conv1(y))\n", | |
" return y\n", | |
"\n", | |
"\n", | |
"class UNet3DUpBlock(torch.nn.Module):\n", | |
" def __init__(\n", | |
" self,\n", | |
" in_channels: int,\n", | |
" hidden_channels: int,\n", | |
" out_channels: int,\n", | |
" stride: int = 1,\n", | |
" tconv: bool = True,\n", | |
" ) -> None:\n", | |
" super().__init__()\n", | |
" self._block = UNet3DBlock(in_channels, hidden_channels, stride)\n", | |
" self._tconv = torch.nn.ConvTranspose3d(\n", | |
" hidden_channels, out_channels, kernel_size=2, stride=2\n", | |
" ) if tconv else torch.nn.Identity()\n", | |
"\n", | |
" def forward(self, x, skip=None):\n", | |
" if skip is not None:\n", | |
" x = torch.cat([x, skip], dim=1)\n", | |
" \n", | |
" x = self._block(x)\n", | |
" x = self._tconv(x)\n", | |
"\n", | |
" return x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "ea2e8e32-7027-414e-a95f-4f582c7fa127", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"torch.Size([1, 25, 128, 128, 128])" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"net = UNet3D(in_channels=1, num_classes=25)\n", | |
"net.eval()\n", | |
"x = torch.randn(1, 1, 128, 128, 128)\n", | |
"net(x).shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "c04bf7d7-4ae7-4832-a495-7a23e100b760", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"torch.Size([1, 25, 128, 128, 128])" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"traced = torch.jit.trace(net, x)\n", | |
"net(x).shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "d811acca-8d61-485d-8dd5-67c643793664", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"Converting PyTorch Frontend ==> MIL Ops: 100%|▉| 239/240 [00:00<00:00, 2034.62 o\n", | |
"Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 1954.84 passes/\n", | |
"Running MIL default pipeline: 100%|███████| 63/63 [00:00<00:00, 618.89 passes/s]\n", | |
"Running MIL backend_neuralnetwork pipeline: 100%|█| 9/9 [00:00<00:00, 2637.74 pa\n", | |
"Translating MIL ==> NeuralNetwork Ops: 100%|█| 251/251 [00:01<00:00, 138.25 ops/\n" | |
] | |
} | |
], | |
"source": [ | |
"input_shape = ct.Shape(shape=(1,\n", | |
" 1,\n", | |
" ct.RangeDim(lower_bound=32, upper_bound=128, default=64),\n", | |
" ct.RangeDim(lower_bound=32, upper_bound=128, default=64),\n", | |
" ct.RangeDim(lower_bound=32, upper_bound=128, default=64)))\n", | |
"\n", | |
"mlmodel = ct.convert(\n", | |
" traced,\n", | |
" inputs=[ct.TensorType(name=\"input\", shape=input_shape, dtype=np.float32)],\n", | |
" outputs=[ct.TensorType(name=\"output\")],\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "0e6aba2d-5115-4749-8c60-98a54875b733", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(1, 25, 64, 64, 64)" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.rand(1, 1, 64, 64, 64) # works fine\n", | |
"mlmodel.predict({\"input\": x.numpy()})[\"output\"].shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "c2ce6cbd-cdf3-48e1-a65b-31fec57b41f9", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(1, 25, 96, 96, 96)" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.rand(1, 1, 96, 96, 96) # doesn't crash, but orders of magnitude slower\n", | |
"mlmodel.predict({\"input\": x.numpy()})[\"output\"].shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "7e1ef162-192c-4f1b-b3a4-e1bfa104a471", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = torch.rand(1, 1, 128, 128, 128) # segfault\n", | |
"mlmodel.predict({\"input\": x.numpy()})[\"output\"].shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "edb99029-c048-4073-86f8-6d8f1358bac9", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Process 1528 launched: '/Users/laves/miniforge3/envs/py310/bin/python' (arm64) | |
scikit-learn version 1.3.0 is not supported. Minimum required version: 0.17. Maximum required version: 1.1.2. Disabling scikit-learn conversion API. | |
TensorFlow version 2.13.0 has not been tested with coremltools. You may run into unexpected errors. TensorFlow 2.12.0 is the most recent version that has been tested. | |
Torch version 2.0.1 has not been tested with coremltools. You may run into unexpected errors. Torch 2.0.0 is the most recent version that has been tested. | |
2023-10-01 10:13:19.894994+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE. | |
2023-10-01 10:13:19.895120+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE. | |
2023-10-01 10:13:19.895168+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE. | |
2023-10-01 10:13:19.895234+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE. | |
2023-10-01 10:13:19.895341+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE. | |
2023-10-01 10:13:19.895551+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE. | |
2023-10-01 10:13:19.896695+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE. | |
2023-10-01 10:13:19.901464+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE. | |
2023-10-01 10:13:19.908130+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE, N5torch3jit20ScalarAttributeValueINSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEELNS0_13AttributeKindE6EEE. | |
2023-10-01 10:13:19.917683+0200 python[1528:19252] dynamic_cast error 2: One or more of the following type_info's has hidden visibility or is defined in more than one translation unit. They should all have public visibility. N5torch3jit14AttributeValueE, N5torch3jit20ScalarAttributeValueIxLNS0_13AttributeKindE4EEE, N5torch3jit20ScalarAttributeValueIxLNS0_13AttributeKindE4EEE. | |
Converting PyTorch Frontend ==> MIL Ops: 100% 239/240 [00:00<00:00, 5977.04 ops/s] | |
Running MIL frontend_pytorch pipeline: 100% 5/5 [00:00<00:00, 1876.31 passes/s] | |
Running MIL default pipeline: 100% 63/63 [00:00<00:00, 669.39 passes/s] | |
Running MIL backend_neuralnetwork pipeline: 100% 9/9 [00:00<00:00, 2604.08 passes/s] | |
Translating MIL ==> NeuralNetwork Ops: 100% 251/251 [00:01<00:00, 175.94 ops/s] | |
Process 1528 stopped | |
* thread #1, queue = 'com.apple.CoreMLBatchProcessingQueue', stop reason = EXC_BAD_ACCESS (code=2, address=0x2c1000000) | |
frame #0: 0x000000019842150c Espresso`Espresso::conv3d_kernel_cpu::__launch(std::__1::shared_ptr<Espresso::abstract_batch> const&, Espresso::layer_data&, Espresso::layer_data&) + 1524 | |
Espresso`Espresso::conv3d_kernel_cpu::__launch: | |
-> 0x19842150c <+1524>: str wzr, [x8, w9, sxtw #2] | |
0x198421510 <+1528>: add w9, w9, #0x1 | |
0x198421514 <+1532>: subs x10, x10, #0x1 | |
0x198421518 <+1536>: b.ne 0x19842150c ; <+1524> | |
Target 0: (python) stopped. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment