Skip to content

Instantly share code, notes, and snippets.

@otmb
Last active March 29, 2025 21:41
Show Gist options
  • Save otmb/a6867efe8a93b82ef650ce3d20f94b21 to your computer and use it in GitHub Desktop.
Save otmb/a6867efe8a93b82ef650ce3d20f94b21 to your computer and use it in GitHub Desktop.
Yolov9 Convert CoreML

Environment

  • Python 3.11.9

Installation

$ git clone https://github.com/MultimediaTechLab/YOLO.git
$ cd YOLO
$ git fetch origin pull/174/head:pr174
$ git checkout pr174
$ pip install -r requirements.txt
$ pip install coremltools==8.2
$ curl -LO https://gist.githubusercontent.com/otmb/a6867efe8a93b82ef650ce3d20f94b21/raw/922dfd0df1118b74ec8322d7ac1485780c9695c1/export_utils.py
$ cp export_utils.py yolo/utils/export_utils.py
$ python yolo/lazy.py task=export name=ExportCoreml model=v9-s task.format=coreml

Converted model

Yolo v9-s coreml model

import SwiftUI
import Vision
struct ContentView: View {
@ObservedObject var detection = ObjectDetaction()
var uiImage = UIImage(named: "dog.jpg")
@State var image: UIImage?
var body: some View {
VStack {
if let image = image {
Image(uiImage: image).resizable()
.aspectRatio(contentMode: .fit)
}
}
.onAppear {
predict()
}
}
func predict(){
do {
if let uiImage = uiImage {
// let start = Date()
//
// for n in 0..<100 {
// _ = try detection.prediction(uiImage: uiImage)
// }
// let elapsed = Date().timeIntervalSince(start)
// print(elapsed)
_ = try detection.prediction(uiImage: uiImage)
if let img = detection.dstImage {
self.image = img
}
}
} catch {
print(error)
}
}
}
class BBox {
var label = ""
var box = CGRect.zero
var score = 0.0
}
class ObjectDetaction: ObservableObject {
let modelName = "v9-s-x"
private var requests = [VNRequest]()
var originalImage: UIImage? = nil
var dstImage: UIImage? = nil
var bbox = [BBox]()
init(){
if let error = setupVision(){
print(error.localizedDescription)
}
}
@discardableResult
func setupVision() -> NSError? {
let error: NSError! = nil
guard let modelURL = Bundle.main.url(forResource: modelName, withExtension: "mlmodelc") else {
return NSError(domain: "Model file is missing.", code: -1)
}
do {
let config = MLModelConfiguration()
config.computeUnits = .cpuAndGPU
let visionModel = try VNCoreMLModel(for: MLModel(contentsOf: modelURL, configuration: config))
let request = VNCoreMLRequest(model: visionModel, completionHandler: visionObjectDetectionResults)
request.imageCropAndScaleOption = .scaleFit
requests = [request]
} catch let error as NSError {
print("Model loading went wrong: \(error)")
}
return error
}
func runCoreML(uiImage: UIImage, orientation: CGImagePropertyOrientation) throws {
let cgiImage = uiImage.cgImage!
let classifierRequestHandler = VNImageRequestHandler(cgImage: cgiImage,
orientation: orientation, options: [:])
try classifierRequestHandler.perform(requests)
}
func visionObjectDetectionResults(request: VNRequest, error: Error?) {
guard let observations = request.results as? [VNRecognizedObjectObservation] else { fatalError() }
guard let uiImage = originalImage else { return }
var bbox = [BBox]()
for observation in observations {
let width = uiImage.size.width
let height = uiImage.size.height
let bufferSize = CGSize(width: width, height: height)
let box = BBox()
if let topLabel = observation.topLabel {
box.label = topLabel.identifier
box.score = Double(topLabel.confidence)
}
box.box = VNImageRectForNormalizedRect(
observation.boundingBox.flipped, Int(bufferSize.width), Int(bufferSize.height))
bbox.append(box)
}
let dstImageSize = uiImage.size
let dstImageFormat = UIGraphicsImageRendererFormat()
dstImageFormat.scale = 1
let renderer = UIGraphicsImageRenderer(size: dstImageSize,
format: dstImageFormat)
let dstImage = renderer.image { rendererContext in
draw(image: uiImage.cgImage!, in: rendererContext.cgContext)
for box in bbox {
if box.score < 0.5 {
continue
}
draw(rect: box.box, in: rendererContext.cgContext)
}
}
self.bbox = bbox
self.dstImage = dstImage
}
func draw(image: CGImage, in cgContext: CGContext) {
cgContext.saveGState()
cgContext.scaleBy(x: 1.0, y: -1.0)
let drawingRect = CGRect(x: 0, y: -image.height, width: image.width, height: image.height)
cgContext.draw(image, in: drawingRect)
cgContext.restoreGState()
}
var boxColor: UIColor = UIColor.white
var boxLineWidth: CGFloat = 2
private func draw(rect box: CGRect, in cgContext: CGContext) {
cgContext.setStrokeColor(boxColor.cgColor)
cgContext.setLineWidth(boxLineWidth)
cgContext.addRect(box)
cgContext.strokePath()
}
func prediction(uiImage: UIImage, orientation: CGImagePropertyOrientation = .up) throws -> [BBox] {
self.originalImage = uiImage
try runCoreML(uiImage: uiImage, orientation: orientation)
return self.bbox
}
}
extension VNRecognizedObjectObservation {
var topLabel: VNClassificationObservation? {
self.labels.max { $0.confidence < $1.confidence }
}
}
extension CGRect {
var flipped: CGRect {
return CGRect(x: origin.x,
y: 1 - origin.y - height,
width: width, height: height)
}
}
from pathlib import Path
from typing import Dict, List, Optional
from yolo.config.config import Config
from yolo.model.yolo import YOLO
from yolo.utils.logger import logger
class ModelExporter:
def __init__(self, cfg: Config, model: YOLO, format: str, model_path: Optional[str] = None):
self.model = model
self.cfg = cfg
self.class_num = cfg.dataset.class_num
self.format = format
if cfg.weight == True:
cfg.weight = Path("weights") / f"{cfg.model.name}.pt"
if model_path:
self.model_path = model_path
else:
extention = self.format
if self.format == "coreml":
extention = "mlpackage"
self.model_path = f"{Path(self.cfg.weight).stem}.{extention}"
self.output_names: List[str] = [
"1_class_scores_small",
"2_box_features_small",
"3_bbox_deltas_small",
"4_class_scores_medium",
"5_box_features_medium",
"6_bbox_deltas_medium",
"7_class_scores_large",
"8_box_features_large",
"9_bbox_deltas_large",
]
def export_onnx(self, dynamic_axes: Optional[Dict[str, Dict[int, str]]] = None, model_path: Optional[str] = None):
logger.info(f":package: Exporting model to onnx format")
import torch
dummy_input = torch.ones((1, 3, *self.cfg.image_size))
if model_path:
onnx_model_path = model_path
else:
onnx_model_path = self.model_path
torch.onnx.export(
self.model,
dummy_input,
onnx_model_path,
input_names=["input"],
output_names=self.output_names,
dynamic_axes=dynamic_axes,
)
logger.info(f":inbox_tray: ONNX model saved to {onnx_model_path}")
return onnx_model_path
def export_tflite(self):
logger.info(f":package: Exporting model to tflite format")
import torch
self.model.eval()
example_inputs = (torch.rand(1, 3, *self.cfg.image_size),)
import ai_edge_torch
edge_model = ai_edge_torch.convert(self.model, example_inputs)
edge_model.export(self.model_path)
logger.info(f":white_check_mark: Model exported to tflite format")
def export_coreml(self):
logger.info(f":package: Exporting model to coreml format")
import torch
self.model.eval()
example_inputs = (torch.rand(1, 3, *self.cfg.image_size),)
model = CoremlPostProcess(self.model, example_inputs[0])
model_output = model(example_inputs[0])
exported_program = torch.jit.trace(model, example_inputs, strict=True)
import logging
import coremltools as ct
# Convert to Core ML program using the Unified Conversion API.
logging.getLogger("coremltools").disabled = True
model_from_export = ct.convert(
exported_program,
inputs=[ct.ImageType("image", shape=example_inputs[0].shape,
scale=1/255., bias=[0,0,0])],
convert_to="mlprogram",
minimum_deployment_target=ct.target.iOS15,
compute_precision=ct.precision.FLOAT16,
)
model_from_export.save(self.model_path)
logger.info(f":white_check_mark: Model exported to coreml format {self.model_path}")
# + export
mlmodel = model_from_export
modelSpec = mlmodel.get_spec()
nmsSpec = createNmsModelSpec(modelSpec, model_output)
experiment_name = "v9-s-x.mlpackage"
test_size = (640, 640)
combineModelsAndExport(
mlmodel, modelSpec, nmsSpec, experiment_name, test_size)
import torch
from torch import nn
from yolo.utils.bounding_box_utils import transform_bbox
import coremltools as ct
class CoremlPostProcess(nn.Module):
def __init__(self, model, im):
_, _, h, w = im.shape
super().__init__()
self.model = model
self.normalize = torch.tensor([1.0 / w, 1.0 / h, 1.0 / w, 1.0 / h])
def forward(self, x):
prediction = self.model(x)
pred_class, _, pred_bbox = prediction[:3]
pred_conf = prediction[3] if len(prediction) == 4 else None
pred_class = pred_class.sigmoid() * (1 if pred_conf is None else pred_conf)
pred_bbox = transform_bbox(pred_bbox, "xyxy -> xycwh")
return pred_class[0], pred_bbox[0] * self.normalize
classLabels = ('Person', 'Bicycle', 'Car', 'Motorcycle', 'Airplane', 'Bus', 'Train', 'Truck', 'Boat', 'Traffic light', 'Fire hydrant', 'Stop sign', 'Parking meter', 'Bench', 'Bird', 'Cat', 'Dog', 'Horse', 'Sheep', 'Cow', 'Elephant', 'Bear', 'Zebra', 'Giraffe', 'Backpack', 'Umbrella', 'Handbag', 'Tie', 'Suitcase', 'Frisbee', 'Skis', 'Snowboard', 'Sports ball', 'Kite', 'Baseball bat', 'Baseball glove', 'Skateboard', 'Surfboard', 'Tennis racket', 'Bottle', 'Wine glass', 'Cup', 'Fork', 'Knife', 'Spoon', 'Bowl', 'Banana', 'Apple', 'Sandwich', 'Orange', 'Broccoli', 'Carrot', 'Hot dog', 'Pizza', 'Donut', 'Cake', 'Chair', 'Couch', 'Potted plant', 'Bed', 'Dining table', 'Toilet', 'Tv', 'Laptop', 'Mouse', 'Remote', 'Keyboard', 'Cell phone', 'Microwave', 'Oven', 'Toaster', 'Sink', 'Refrigerator', 'Book', 'Clock', 'Vase', 'Scissors', 'Teddy bear', 'Hair drier', 'Toothbrush')
iouThreshold = 0.6
confidenceThreshold = 0.1
specificationVersion = 7
numberOfClassLabels = 80
model_name = "yolo-v9-s"
def createNmsModelSpec(modelSpec, model_output):
'''
Create a coreml model with nms to filter the results of the model
'''
nmsSpec = ct.proto.Model_pb2.Model()
nmsSpec.specificationVersion = 6
out0, out1 = iter(modelSpec.description.output)
out0_shape = tuple(model_output[0].size())
out1_shape = tuple(model_output[1].size())
out0.type.multiArrayType.shape[:] = out0_shape
out1.type.multiArrayType.shape[:] = out1_shape
# Define input and outputs of the model
for i in range(2):
nnOutput = modelSpec.description.output[i].SerializeToString()
nmsSpec.description.input.add()
nmsSpec.description.input[i].ParseFromString(nnOutput)
nmsSpec.description.output.add()
nmsSpec.description.output[i].ParseFromString(nnOutput)
nmsSpec.description.output[0].name = "confidence"
nmsSpec.description.output[1].name = "coordinates"
# Define output shape of the model
outputSizes = [numberOfClassLabels, 4]
for i in range(len(outputSizes)):
maType = nmsSpec.description.output[i].type.multiArrayType
# First dimension of both output is the number of boxes, which should be flexible
maType.shapeRange.sizeRanges.add()
maType.shapeRange.sizeRanges[0].lowerBound = 0
maType.shapeRange.sizeRanges[0].upperBound = -1
# Second dimension is fixed, for "confidence" it's the number of classes, for coordinates it's position (x, y) and size (w, h)
maType.shapeRange.sizeRanges.add()
maType.shapeRange.sizeRanges[1].lowerBound = outputSizes[i]
maType.shapeRange.sizeRanges[1].upperBound = outputSizes[i]
del maType.shape[:]
# Define the model type non maximum supression
nms = nmsSpec.nonMaximumSuppression
nms.confidenceInputFeatureName = out0.name
nms.coordinatesInputFeatureName = out1.name
nms.confidenceOutputFeatureName = "confidence"
nms.coordinatesOutputFeatureName = "coordinates"
nms.iouThresholdInputFeatureName = "iouThreshold"
nms.confidenceThresholdInputFeatureName = "confidenceThreshold"
# Some good default values for the two additional inputs, can be overwritten when using the model
nms.iouThreshold = iouThreshold
nms.confidenceThreshold = confidenceThreshold
nms.stringClassLabels.vector.extend(classLabels)
return nmsSpec
def combineModelsAndExport(mlmodel, modelSpec, nmsSpec, fileName, test_size):
'''
Combines the coreml model with export logic and the nms to one final model. Optionally save with different quantization (32, 16, 8) (Works only if on Mac Os)
'''
try:
print(f'Combine CoreMl model with nms and export model')
# Combine models to a single one
pipeline = ct.models.pipeline.Pipeline(input_features=[("image", ct.models.datatypes.Array(3, test_size[0], test_size[1])),
("iouThreshold", ct.models.datatypes.Double(
)),
("confidenceThreshold", ct.models.datatypes.Double())], output_features=["confidence", "coordinates"])
# Required version (>= ios13) in order for mns to work
pipeline.spec.specificationVersion = 6
pipeline.add_model(modelSpec)
pipeline.add_model(nmsSpec)
pipeline.spec.description.input[0].ParseFromString(
modelSpec.description.input[0].SerializeToString())
pipeline.spec.description.output[0].ParseFromString(
nmsSpec.description.output[0].SerializeToString())
pipeline.spec.description.output[1].ParseFromString(
nmsSpec.description.output[1].SerializeToString())
# Metadata for the model‚
pipeline.spec.description.input[
1].shortDescription = f"(optional) IOU Threshold override (Default: {iouThreshold})"
pipeline.spec.description.input[
2].shortDescription = f"(optional) Confidence Threshold override (Default: {confidenceThreshold})"
pipeline.spec.description.output[0].shortDescription = u"Boxes \xd7 Class confidence"
pipeline.spec.description.output[
1].shortDescription = u"Boxes \xd7 [x, y, width, height] (relative to image size)"
pipeline.spec.description.metadata.versionString = "1.0"
pipeline.spec.description.metadata.shortDescription = model_name
pipeline.spec.description.metadata.author = ""
pipeline.spec.description.metadata.license = ""
model = ct.models.MLModel(pipeline.spec, weights_dir = mlmodel.weights_dir)
model.save(fileName)
print(f'CoreML export success, saved as {fileName}')
except Exception as e:
print(f'CoreML export failure: {e}')
This file has been truncated, but you can view the full file.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment