Created
July 28, 2022 02:45
-
-
Save fengyuentau/6a5a4946758e2640912ce6992cbaba4b to your computer and use it in GitHub Desktop.
Single operator inference with Ascend and OpenCV for input and output
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "acl/acl.h" | |
#include "acl/ops/acl_cblas.h" | |
#include "acl/acl_op_compiler.h" | |
#include "opencv2/imgproc.hpp" | |
#include <iostream> | |
#include <vector> | |
#include <string> | |
#include <fstream> | |
#include <algorithm> // for transform | |
static std::string getType(const std::string& header) | |
{ | |
std::string field = "'descr':"; | |
int idx = header.find(field); | |
//CV_Assert(idx != -1); | |
int from = header.find('\'', idx + field.size()) + 1; | |
int to = header.find('\'', from); | |
return header.substr(from, to - from); | |
} | |
static std::string getFortranOrder(const std::string& header) | |
{ | |
std::string field = "'fortran_order':"; | |
int idx = header.find(field); | |
//CV_Assert(idx != -1); | |
int from = header.find_last_of(' ', idx + field.size()) + 1; | |
int to = header.find(',', from); | |
return header.substr(from, to - from); | |
} | |
static std::vector<int> getShape(const std::string& header) | |
{ | |
std::string field = "'shape':"; | |
int idx = header.find(field); | |
//CV_Assert(idx != -1); | |
int from = header.find('(', idx + field.size()) + 1; | |
int to = header.find(')', from); | |
std::string shapeStr = header.substr(from, to - from); | |
if (shapeStr.empty()) | |
return std::vector<int>(1, 1); | |
// Remove all commas. | |
shapeStr.erase(std::remove(shapeStr.begin(), shapeStr.end(), ','), | |
shapeStr.end()); | |
std::istringstream ss(shapeStr); | |
int value; | |
std::vector<int> shape; | |
while (ss >> value) | |
{ | |
shape.push_back(value); | |
} | |
return shape; | |
} | |
cv::Mat blobFromNPY(const std::string& path) | |
{ | |
std::ifstream ifs(path.c_str(), std::ios::binary); | |
//CV_Assert(ifs.is_open()); | |
std::string magic(6, '*'); | |
ifs.read(&magic[0], magic.size()); | |
//CV_Assert(magic == "\x93NUMPY"); | |
ifs.ignore(1); // Skip major version byte. | |
ifs.ignore(1); // Skip minor version byte. | |
unsigned short headerSize; | |
ifs.read((char*)&headerSize, sizeof(headerSize)); | |
std::string header(headerSize, '*'); | |
ifs.read(&header[0], header.size()); | |
// Extract data type. | |
//CV_Assert(getType(header) == "<f4"); | |
//CV_Assert(getFortranOrder(header) == "False"); | |
std::vector<int> shape = getShape(header); | |
cv::Mat blob(shape, CV_32F); | |
ifs.read((char*)blob.data, blob.total() * blob.elemSize()); | |
//CV_Assert((size_t)ifs.gcount() == blob.total() * blob.elemSize()); | |
return blob; | |
} | |
void printBlob(const cv::Mat& m, int end) | |
{ | |
const float* mptr = (const float*)m.data; | |
for (int i = 0; i < end; i++) | |
std::cout << mptr[i] << " "; | |
std::cout << std::endl; | |
} | |
// Conv2D | |
// * input shape [1, 3, 10, 10] | |
// * kernel shape [5, 3, 5, 5] | |
// * output shape [1, 5, 4, 4] | |
int main() | |
{ | |
// Ascend resource initialization | |
// * init ascend | |
aclInit(NULL); | |
// * set device | |
int deviceID = 0; | |
aclrtSetDevice(deviceID); | |
// * create context | |
aclrtContext context = nullptr; | |
aclrtCreateContext(&context, deviceID); | |
// * create stream | |
aclrtStream stream = nullptr; | |
aclrtCreateStream(&stream); | |
int ret; | |
// Inputs | |
// * get input pointer | |
std::vector<int64_t> shape = {1, 3, 10, 10}; | |
size_t inputSizeInByte = sizeof(float) * 1 * 3 * 10 * 10; | |
cv::Mat inputMat = blobFromNPY("./input_convolution.npy"); | |
//printBlob(inputMat, 300); | |
const void* inputOnHost = (const void*)inputMat.data; | |
// * alloc buffer for input on device | |
void* inputOnDevice = nullptr; | |
ret = aclrtMalloc(&inputOnDevice, inputSizeInByte, ACL_MEM_MALLOC_NORMAL_ONLY); | |
std::cout << "inputOnDevice malloc status: " << ret << std::endl; | |
// * send the input data from host to device | |
ret = aclrtMemcpy(inputOnDevice, inputSizeInByte, inputOnHost, inputSizeInByte, ACL_MEMCPY_HOST_TO_DEVICE); | |
std::cout << "inputOnDevice memcpy status: " << ret << std::endl; | |
// Model parameters | |
// * w | |
std::vector<int64_t> w_shape = {5, 3, 5, 5}; | |
size_t wSizeInByte = sizeof(float) * 5 * 3 * 5 * 5; | |
cv::Mat wMat = blobFromNPY("./convolution_w.npy"); | |
const void* wOnHost = (const void*)wMat.data; | |
//printBlob(wMat, 25); | |
void* wOnDevice = nullptr; | |
ret = aclrtMalloc(&wOnDevice, wSizeInByte, ACL_MEM_MALLOC_NORMAL_ONLY); | |
std::cout << "wOnDevice malloc status: " << ret << std::endl; | |
ret = aclrtMemcpy(wOnDevice, wSizeInByte, wOnHost, wSizeInByte, ACL_MEMCPY_HOST_TO_DEVICE); | |
std::cout << "wOnDevice memcpy status: " << ret << std::endl; | |
// * b | |
std::vector<int64_t> b_shape = {5}; | |
size_t bSizeInByte = sizeof(float) * 5; | |
cv::Mat bMat = blobFromNPY("./convolution_b.npy"); | |
//printBlob(bMat, 5); | |
const void* bOnHost = (const void*)bMat.data; | |
void* bOnDeivce = nullptr; | |
ret = aclrtMalloc(&bOnDeivce, bSizeInByte, ACL_MEM_MALLOC_NORMAL_ONLY); | |
std::cout << "bOnDevice malloc status: " << ret << std::endl; | |
ret = aclrtMemcpy(bOnDeivce, bSizeInByte, bOnHost, bSizeInByte, ACL_MEMCPY_HOST_TO_DEVICE); | |
std::cout << "bOnDevice memcpy status: " << ret << std::endl; | |
// Model output | |
// * get output shape | |
std::vector<int64_t> output_shape = {1, 5, 4, 4}; | |
size_t outputSizeInByte = sizeof(float) * 1 * 5 * 4 * 4; | |
// * alloc buffer for output on device | |
void* outputOnDevice = nullptr; | |
ret = aclrtMalloc(&outputOnDevice, outputSizeInByte, ACL_MEM_MALLOC_HUGE_FIRST); | |
std::cout << "outputonDevice malloc status: " << ret << std::endl; | |
// Create model | |
std::string opName("Conv2D"); | |
// * set attr, stides, pads, dilations | |
aclopAttr* opAttr = aclopCreateAttr(); | |
std::vector<int64_t> stridesValue = {1, 1, 2, 2}; // strides | |
ret = aclopSetAttrListInt(opAttr, "strides", stridesValue.size(), stridesValue.data()); | |
std::cout << "attr set strides: " << ret << std::endl; | |
std::vector<int64_t> padsValue = {1, 1, 1, 1}; // pads | |
ret = aclopSetAttrListInt(opAttr, "pads", padsValue.size(), padsValue.data()); | |
std::cout << "attr set pads: " << ret << std::endl; | |
std::vector<int64_t> dilationsValue = {1, 1, 1, 1}; // dilations | |
ret = aclopSetAttrListInt(opAttr, "dilations", dilationsValue.size(), dilationsValue.data()); | |
std::cout << "attr set dilations: " << ret << std::endl; | |
int groups = 1; | |
ret = aclopSetAttrInt(opAttr, "groups", groups); | |
std::cout << "attr set groups: " << ret << std::endl; | |
int offset_x = 0; | |
ret = aclopSetAttrInt(opAttr, "offset_x", offset_x); | |
std::cout << "attr set offset_x: " << ret << std::endl; | |
// * set inputTensor (description) | |
std::vector<aclTensorDesc*> inputTensorDesc; | |
inputTensorDesc.push_back(aclCreateTensorDesc(ACL_FLOAT, // ACL data type | |
shape.size(), // num of dim | |
shape.data(), // dims | |
ACL_FORMAT_NCHW)); // ACL tensor format | |
inputTensorDesc.push_back(aclCreateTensorDesc(ACL_FLOAT, | |
w_shape.size(), | |
w_shape.data(), | |
ACL_FORMAT_NCHW)); | |
inputTensorDesc.push_back(aclCreateTensorDesc(ACL_FLOAT, | |
b_shape.size(), | |
b_shape.data(), | |
ACL_FORMAT_ND)); | |
// * set outputTensor (description), similar above | |
std::vector<aclTensorDesc*> outputTensorDesc; | |
outputTensorDesc.push_back(aclCreateTensorDesc(ACL_FLOAT, | |
output_shape.size(), | |
output_shape.data(), | |
ACL_FORMAT_NCHW)); | |
// Inference | |
// * create data buffer for input | |
std::vector<aclDataBuffer*> inputBuffers; | |
inputBuffers.push_back(aclCreateDataBuffer(inputOnDevice, inputSizeInByte)); | |
inputBuffers.push_back(aclCreateDataBuffer(wOnDevice, wSizeInByte)); | |
inputBuffers.push_back(aclCreateDataBuffer(bOnDeivce, bSizeInByte)); | |
// * create data buffer for output | |
std::vector<aclDataBuffer*> outputBuffers; | |
outputBuffers.push_back(aclCreateDataBuffer(outputOnDevice, outputSizeInByte)); | |
// * forward: call aclopExecute() | |
ret = aclopCompileAndExecute(opName.c_str(), | |
inputTensorDesc.size(), inputTensorDesc.data(), inputBuffers.data(), | |
outputTensorDesc.size(), outputTensorDesc.data(), outputBuffers.data(), | |
opAttr, ACL_ENGINE_SYS, ACL_COMPILE_SYS, NULL, stream); | |
std::cout << "op execute: " << ret << std::endl; | |
// * synchronize stream | |
aclrtSynchronizeStream(stream); | |
// Get output - move from device to host | |
// * send the output data from device to host | |
void* outputOnHost = nullptr; | |
aclrtMallocHost(&outputOnHost, outputSizeInByte); | |
aclrtMemcpy(outputOnHost, outputSizeInByte, outputOnDevice, outputSizeInByte, ACL_MEMCPY_DEVICE_TO_HOST); | |
// * construct outputMat | |
std::vector<int> output_shape_int = {1, 5, 4, 4}; | |
cv::Mat tmp(output_shape_int, CV_32FC1, outputOnHost); | |
cv::Mat outputMat; | |
tmp.copyTo(outputMat); | |
std::cout << outputMat.size << std::endl; | |
printBlob(outputMat, 5*4*4); | |
// * write to file | |
//ofstream outstr("res.out", ios::out | ios::binary); | |
//outstr.write((char*) | |
// Release Ascend resource | |
// * release stream | |
aclrtDestroyStream(stream); | |
stream = nullptr; | |
// * release context | |
aclrtDestroyContext(context); | |
context = nullptr; | |
// * reset device | |
aclrtResetDevice(deviceID); | |
// * de-init ascend | |
aclFinalize(); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cmake_minimum_required(VERSION 3.16.3) | |
project(ascend-conv2d) | |
# Find OpenCV | |
find_package(OpenCV 4.5.4 REQUIRED) | |
include_directories(${OpenCV_INCLUDE_DIRS}) | |
# Find Ascend | |
set(ASCEND_INSTALL_DIR $ENV{ASCEND_INSTALL_DIR}) | |
set(ASCEND_DRIVER_DIR $ENV{ASCEND_DRIVER_DIR}/lib64) | |
set(ASCEND_INCLUDE_DIR "${ASCEND_INSTALL_DIR}/include") | |
include_directories(${ASCEND_INCLUDE_DIR}) | |
#set(ASCEND_LIBRARY_ASCENDCL "${ASCEND_INSTALL_DIR}/acllib/lib64/libascendcl.so") | |
find_library(ASCEND_LIBRARY_ASCENDCL NAMES ascendcl PATHS "${ASCEND_INSTALL_DIR}/acllib/lib64" NO_DEFAULT_PATH) | |
find_library(ASCEND_LIBRARY_ACLOPCOMPILER NAMES acl_op_compiler PATHS "${ASCEND_INSTALL_DIR}/compiler/lib64" NO_DEFAULT_PATH) | |
link_directories(${ASCEND_DRIVER_DIR}) | |
add_executable(conv2d ./ascend_conv2d.cpp) | |
target_link_libraries(conv2d ${OpenCV_LIBS} ${ASCEND_LIBRARY_ASCENDCL} ${ASCEND_LIBRARY_ACLOPCOMPILER}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
npy files can be found in https://github.com/opencv/opencv_extra/tree/4.x/testdata/dnn/onnx. npy for weights and bias are extracted from convolution.npy.