Last active
May 8, 2020 09:19
-
-
Save YashasSamaga/71157cf0c3768c497e5e70fb95435596 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <algorithm> | |
#include <fstream> | |
#include <vector> | |
#include <chrono> | |
#include <opencv2/dnn.hpp> | |
#include <opencv2/highgui.hpp> | |
#include "benchmark.hpp" | |
constexpr bool async = false; | |
constexpr auto default_batch_size = 1; | |
//#define USE_RANDOM_IMAGES | |
struct mask_type { | |
int backend; | |
int target; | |
}; | |
struct config_type { | |
std::string name; | |
int backend; | |
int target; | |
}; | |
std::vector<config_type> backends = { | |
{"OCV CPU", cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_CPU}, | |
{"OCV OpenCL", cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_OPENCL}, | |
{"OCV OpenCL FP16", cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_OPENCL_FP16}, | |
{"IE CPU", cv::dnn::DNN_BACKEND_INFERENCE_ENGINE, cv::dnn::DNN_TARGET_CPU}, | |
{"CUDA FP32", cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA}, | |
{"CUDA FP16", cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA_FP16} | |
}; | |
std::vector<cv::Mat> image_samples; | |
template <class T> | |
auto to_milliseconds(const T& duration) { | |
return std::chrono::duration_cast<std::chrono::milliseconds>(duration); | |
} | |
template <class T> | |
auto to_microseconds(const T& duration) { | |
return std::chrono::duration_cast<std::chrono::microseconds>(duration); | |
} | |
template <std::size_t BenchmarkTrials = 10, std::size_t WarmupTrials = 3> | |
auto run_network(const std::string& model, const std::string& config, int backend, int target, const cv::Mat& blob) | |
{ | |
auto net = cv::dnn::readNet(model, config); | |
net.setPreferableBackend(backend); | |
net.setPreferableTarget(target); | |
for(int i = 0; i < WarmupTrials; i++) | |
{ | |
net.setInput(blob); | |
net.forward(); | |
} | |
return benchmark([&] { | |
for(int i = 0; i < BenchmarkTrials; i++) | |
{ | |
net.setInput(blob); | |
net.forward(); | |
} | |
}); | |
} | |
template <std::size_t BenchmarkTrials = 10, std::size_t WarmupTrials = 3> | |
auto run_network_async(const std::string& model, const std::string& config, int backend, int target, const cv::Mat& blob) | |
{ | |
auto net = cv::dnn::readNet(model, config); | |
net.setPreferableBackend(backend); | |
net.setPreferableTarget(target); | |
for(int i = 0; i < WarmupTrials; i++) | |
{ | |
net.setInput(blob); | |
net.forward(); | |
} | |
auto waste = benchmark([] { }); | |
using duration_type = decltype(waste); | |
duration_type init_time{}, wait_time{}; | |
cv::AsyncArray asyncOutput; | |
cv::Mat output; | |
for(int i = 0; i < BenchmarkTrials; i++) | |
{ | |
init_time += benchmark([&] { | |
net.setInput(blob); | |
asyncOutput = net.forwardAsync(); | |
}); | |
wait_time += benchmark([&] { | |
asyncOutput.get(output); | |
}); | |
} | |
return std::pair<decltype(init_time), decltype(wait_time)>(init_time, wait_time); | |
} | |
void bench_network_async(const std::string& model, const std::string& config, cv::Size input_size, int count = default_batch_size, std::vector<mask_type> mask = {}) { | |
#ifndef USE_RANDOM_IMAGES | |
assert(count <= image_samples.size()); | |
#endif | |
std::vector<cv::Mat> images; | |
for (int i = 0; i < count; i++) | |
{ | |
#ifdef USE_RANDOM_IMAGES | |
cv::Mat image(input_size, type); | |
cv::randu(image, cv::Scalar(0, 0, 0), cv::Scalar(255, 255, 255)); | |
images.push_back(image); | |
#else | |
images.push_back(image_samples[i]); | |
#endif | |
} | |
cv::Mat blob = cv::dnn::blobFromImages(images, 1.0f, input_size, 0.0f); | |
constexpr int N = 10; | |
auto time = run_network_async<N>(model, config, cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA, blob); | |
float average_init = to_microseconds(time.first).count() / 1000.0 / N; | |
float average_wait = to_microseconds(time.second).count() / 1000.0 / N; | |
std::cout << "Average Initialization Time: " << average_init << "ms" << std::endl; | |
std::cout << "Average Waiting Time: " << average_wait << "ms" << std::endl; | |
std::cout << std::endl; | |
} | |
void bench_network_sync(const std::string& model, const std::string& config, cv::Size input_size, int count = default_batch_size, std::vector<mask_type> mask = {}) { | |
#ifndef USE_RANDOM_IMAGES | |
assert(count <= image_samples.size()); | |
#endif | |
std::vector<cv::Mat> images; | |
for (int i = 0; i < count; i++) | |
{ | |
#ifdef USE_RANDOM_IMAGES | |
cv::Mat image(input_size, type); | |
cv::randu(image, cv::Scalar(0, 0, 0), cv::Scalar(255, 255, 255)); | |
images.push_back(image); | |
#else | |
images.push_back(image_samples[i]); | |
#endif | |
} | |
cv::Mat blob = cv::dnn::blobFromImages(images, 1.0f, input_size, 0.0f); | |
for(auto c : backends) { | |
auto backend = c.backend; | |
auto target = c.target; | |
bool masked = false; | |
for(auto m : mask) { | |
if(m.backend == backend && target == target) | |
masked = true; | |
if(m.backend == backend && m.target == -1) | |
masked = true; | |
if(m.backend == -1 && m.target == target) | |
masked = true; | |
} | |
if(masked) | |
continue; | |
constexpr int N = 10; | |
auto time = run_network<N>(model, config, backend, target, blob); | |
float average = to_microseconds(time).count() / 1000.0 / N; | |
std::cout << c.name << ": " << average << "ms" << std::endl; | |
} | |
std::cout << std::endl; | |
} | |
void bench_network(const std::string& model, const std::string& config, cv::Size input_size, int count = default_batch_size, std::vector<mask_type> mask = {}) { | |
if(async) | |
bench_network_async(model, config, input_size, count, mask); | |
else | |
bench_network_sync(model, config, input_size, count, mask); | |
} | |
void bench_alexnet() | |
{ | |
std::cout << "BVLC AlexNet\n"; | |
bench_network("data/alexnet/deploy.prototxt", "data/alexnet/bvlc_alexnet.caffemodel", cv::Size(227, 227)); | |
std::cout << std::endl; | |
} | |
void bench_googlenet() | |
{ | |
std::cout << "BVLC GoogleNet\n"; | |
bench_network("data/googlenet/deploy.prototxt", "data/googlenet/bvlc_googlenet.caffemodel", cv::Size(224, 224)); | |
std::cout << std::endl; | |
} | |
void bench_resnet50() | |
{ | |
std::cout << "ResNet 50\n"; | |
bench_network("data/resnet50/ResNet-50-deploy.prototxt", "data/resnet50/ResNet-50-model.caffemodel", cv::Size(224, 224)); | |
std::cout << std::endl; | |
} | |
void bench_squeezenet() | |
{ | |
std::cout << "SqueezeNet v1.1\n"; | |
bench_network("data/squeezenet/squeezenet_v1.1.prototxt", "data/squeezenet/squeezenet_v1.1.caffemodel", cv::Size(227, 227)); | |
std::cout << std::endl; | |
} | |
void bench_tensorflow_inception_5h() | |
{ | |
std::cout << "TensorFlow Inception 5h\n"; | |
bench_network("data/tensorflow_inception_5h/tensorflow_inception_graph.pb", "", cv::Size(224, 224)); | |
std::cout << std::endl; | |
} | |
void bench_vgg16() | |
{ | |
std::cout << "VGG16 SSD\n"; | |
bench_network("data/vgg16/ssd_vgg16.prototxt", "data/vgg16/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel", cv::Size(300, 300)); | |
std::cout << std::endl; | |
} | |
void bench_enet() | |
{ | |
std::cout << "ENet Cityscapes\n"; | |
bench_network("data/enet/model-cityscapes.net", "", cv::Size(512, 256), 1, | |
{ | |
{cv::dnn::DNN_BACKEND_INFERENCE_ENGINE, -1}, | |
// {-1, cv::dnn::DNN_TARGET_OPENCL_FP16} | |
}); | |
std::cout << std::endl; | |
} | |
void bench_openface_nn4_small2_v1() | |
{ | |
std::cout << "OpenFace nn4 small2 v1\n"; | |
bench_network("data/openface/nn4.small2.v1.t7", "", cv::Size(96, 96)); | |
std::cout << std::endl; | |
} | |
void bench_mobilenet_ssd() | |
{ | |
std::cout << "MobileNet SSD\n"; | |
bench_network("data/mobilenet_ssd/MobileNetSSD_deploy.prototxt", "data/mobilenet_ssd/MobileNetSSD_deploy.caffemodel", cv::Size(300, 300)); | |
std::cout << std::endl; | |
} | |
void bench_mobilenet_ssd_v1_coco() | |
{ | |
std::cout << "MobileNet SSD Coco v1\n"; | |
bench_network("data/mobilenet_ssd_v1_coco_2017_11_17/ssd_mobilenet_v1_coco_2017_11_17.pb", "data/mobilenet_ssd_v1_coco_2017_11_17/ssd_mobilenet_v1_coco_2017_11_17.pbtxt", cv::Size(300, 300)); | |
std::cout << std::endl; | |
} | |
void bench_mobilenet_ssd_v2_coco() | |
{ | |
std::cout << "MobileNet SSD Coco v2\n"; | |
bench_network("data/mobilenet_ssd_v2_coco_2018_03_29/ssd_mobilenet_v2_coco_2018_03_29.pb", "data/mobilenet_ssd_v2_coco_2018_03_29/ssd_mobilenet_v2_coco_2018_03_29.pbtxt", cv::Size(300, 300)); | |
std::cout << std::endl; | |
} | |
void bench_densenet121() | |
{ | |
std::cout << "DenseNet 121\n"; | |
bench_network("data/densenet121/DenseNet_121.prototxt", "data/densenet121/DenseNet_121.caffemodel", cv::Size(224, 224)); | |
std::cout << std::endl; | |
} | |
void bench_openpose_pose_mpi() | |
{ | |
std::cout << "OpenPose pose MPI\n"; | |
bench_network("data/openpose_pose_mpi/openpose_pose_mpi_faster_4_stages.prototxt", "data/openpose_pose_mpi/pose_iter_160000.caffemodel", cv::Size(368, 368)); | |
std::cout << std::endl; | |
} | |
void bench_opencv_face_detector() | |
{ | |
std::cout << "OpenCV Face Detector\n"; | |
bench_network("data/opencv_face_detector/deploy.prototxt", "data/opencv_face_detector/res10_300x300_ssd_iter_140000_fp16.caffemodel", cv::Size(300, 300)); | |
std::cout << std::endl; | |
} | |
void bench_inception_v2_coco() | |
{ | |
std::cout << "Inception v2 Coco\n"; | |
bench_network("data/ssd_inception_v2_coco_2017_11_17/ssd_inception_v2_coco_2017_11_17.pb", "data/ssd_inception_v2_coco_2017_11_17/ssd_inception_v2_coco_2017_11_17.pbtxt", cv::Size(300, 300)); | |
std::cout << std::endl; | |
} | |
void bench_yolo_v3() | |
{ | |
std::cout << "YOLO v3\n"; | |
bench_network("data/yolov3/yolov3.cfg", "data/yolov3/yolov3.weights", cv::Size(416, 416)); | |
std::cout << std::endl; | |
} | |
void bench_EAST_text_detection() | |
{ | |
std::cout << "EAST Text Detection\n"; | |
bench_network("data/east_text_detection/frozen_east_text_detection.pb", "", cv::Size(320, 320)); | |
std::cout << std::endl; | |
} | |
void bench_fast_neural_style_sn() | |
{ | |
std::cout << "FastNeuralStyle Stary Night\n"; | |
bench_network("data/fns_stary_night/fast_neural_style_eccv16_starry_night.t7", "", cv::Size(320, 240)); | |
std::cout << std::endl; | |
} | |
void bench_inception_v2_faster_rcnn() | |
{ | |
std::cout << "Inception v2 Faster RCNN\n"; | |
bench_network("data/inception_v2_faster_rcnn/faster_rcnn_inception_v2_coco_2018_01_28.pb", "data/inception_v2_faster_rcnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt", cv::Size(800, 600), default_batch_size, | |
{ | |
{cv::dnn::DNN_BACKEND_INFERENCE_ENGINE, -1} | |
}); | |
std::cout << std::endl; | |
} | |
void bench_yolo_v2() | |
{ | |
std::cout << "YOLO v2\n"; | |
bench_network("data/yolov2/yolov2.cfg", "data/yolov2/yolov2.weights", cv::Size(608, 608)); | |
std::cout << std::endl; | |
} | |
int main(int argc, char *argv[]) { | |
constexpr auto total_images = 10; | |
auto prefix = std::string("data/images/img_"), | |
suffix = std::string(".jpg"); | |
/* populate sample images */ | |
for (int i = 0; i < total_images; i++) { | |
auto file = prefix + std::to_string(i) + suffix; | |
auto image = cv::imread(file); | |
image_samples.push_back(image); | |
} | |
bench_enet(); | |
if(async) | |
{ | |
bench_alexnet(); | |
bench_googlenet(); | |
bench_resnet50(); | |
bench_squeezenet(); | |
bench_tensorflow_inception_5h(); | |
bench_enet(); | |
bench_openface_nn4_small2_v1(); | |
bench_densenet121(); | |
bench_openpose_pose_mpi(); | |
bench_EAST_text_detection(); | |
bench_fast_neural_style_sn(); | |
} | |
else | |
{ | |
bench_alexnet(); | |
bench_googlenet(); | |
bench_resnet50(); | |
bench_squeezenet(); | |
bench_tensorflow_inception_5h(); | |
bench_vgg16(); | |
bench_enet(); | |
bench_openface_nn4_small2_v1(); | |
bench_mobilenet_ssd(); | |
//bench_mobilenet_ssd_v1_coco(); | |
//bench_mobilenet_ssd_v2_coco(); | |
bench_densenet121(); | |
bench_openpose_pose_mpi(); | |
bench_opencv_face_detector(); | |
bench_inception_v2_coco(); | |
bench_yolo_v3(); | |
bench_yolo_v2(); | |
bench_EAST_text_detection(); | |
bench_fast_neural_style_sn(); | |
bench_inception_v2_faster_rcnn(); | |
} | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef BENCHMARK_HPP | |
#define BENCHMARK_HPP | |
#include <chrono> | |
template <class Function, typename ...Args> | |
auto benchmark(Function function, Args&& ...args) { | |
using std::chrono::steady_clock; | |
auto start = steady_clock::now(); | |
function(std::forward<Args>(args)...); | |
auto end = steady_clock::now(); | |
return end - start; | |
} | |
/* doNotOptimizeAway from https://stackoverflow.com/a/36781982/1935009 */ | |
#ifdef _MSC_VER | |
#pragma optimize("", off) | |
template <class T> | |
void doNotOptimizeAway(T&& datum) { | |
datum = datum; | |
} | |
#pragma optimize("", on) | |
#elif defined(__clang__) | |
template <class T> | |
__attribute__((__optnone__)) void doNotOptimizeAway(T&& /* datum */) {} | |
#else | |
template <class T> | |
void doNotOptimizeAway(T&& datum) { | |
asm volatile("" : "+r" (datum)); | |
} | |
#endif | |
#endif /* BENCHMARK_HPP */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment