Last active
August 15, 2021 11:33
-
-
Save szihs/bb6eefebea8ffe1cdcb05089edde0244 to your computer and use it in GitHub Desktop.
Sample code to compute convolution output
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include "include/libnpy/npy.hpp" | |
#include "arm_compute/runtime/NEON/NEFunctions.h" | |
#include "arm_compute/core/Types.h" | |
#include "arm_compute/runtime/Allocator.h" | |
#include "arm_compute/runtime/BlobLifetimeManager.h" | |
#include "arm_compute/runtime/MemoryManagerOnDemand.h" | |
#include "arm_compute/runtime/PoolManager.h" | |
#include "utils/Utils.h" | |
using namespace arm_compute; | |
using namespace utils; | |
using namespace std; | |
class TestCNNExample : public Example | |
{ | |
public: | |
bool do_setup(int argc, char **argv) override | |
{ | |
ARM_COMPUTE_UNUSED(argc); | |
ARM_COMPUTE_UNUSED(argv); | |
// Create memory manager components | |
// We need 2 memory managers: 1 for handling the tensors within the functions (mm_layers) and 1 for handling the input and output tensors of the functions (mm_transitions)) | |
auto lifetime_mgr0 = std::make_shared<BlobLifetimeManager>(); // Create lifetime manager | |
auto lifetime_mgr1 = std::make_shared<BlobLifetimeManager>(); // Create lifetime manager | |
auto pool_mgr0 = std::make_shared<PoolManager>(); // Create pool manager | |
auto pool_mgr1 = std::make_shared<PoolManager>(); // Create pool manager | |
auto mm_layers = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr0, pool_mgr0); // Create the memory manager | |
auto mm_transitions = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr1, pool_mgr1); // Create the memory manager | |
// The weights and biases tensors should be initialized with the values inferred with the training | |
// Set memory manager where allowed to manage internal memory requirements | |
conv0 = arm_compute::support::cpp14::make_unique<NEConvolutionLayer>(mm_layers); | |
/* | |
From | |
https://www.tensorflow.org/api_docs/python/tf/nn/conv2d | |
input -> (1,6,3,2) [batch = 1, in_height = 6, in_width = 3, in_channels = 2] | |
filter -> (4, 3, 2, 4) [filter_height =4, filter_width =3, in_channels=2, out_channels=4] | |
output -> (1, 6, 3, 4), height=6, width=3,channels=4 | |
*/ | |
// Initialize src tensor | |
constexpr unsigned int width_src_image = 3; | |
constexpr unsigned int height_src_image = 6; | |
constexpr unsigned int ifm_src_img = 2; | |
const TensorShape src_shape(width_src_image, height_src_image, ifm_src_img); | |
src.allocator()->init(TensorInfo(src_shape, 1, DataType::F32)); | |
// Initialize tensors of conv0 | |
constexpr unsigned int kernel_x_conv0 = 3; | |
constexpr unsigned int kernel_y_conv0 = 4; | |
constexpr unsigned int ofm_conv0 = 4; | |
//HWIO in TF, OIHW in ACL, i.e ( W, H, I, O) | |
const TensorShape weights_shape_conv0(kernel_x_conv0, kernel_y_conv0, src_shape.z(), ofm_conv0); | |
const TensorShape biases_shape_conv0(ofm_conv0); | |
const TensorShape out_shape_conv0(src_shape.x(), src_shape.y(), weights_shape_conv0[3]); | |
weights0.allocator()->init(TensorInfo(weights_shape_conv0, 1, DataType::F32)); | |
biases0.allocator()->init(TensorInfo(biases_shape_conv0, 1, DataType::F32)); | |
out_conv0.allocator()->init(TensorInfo(out_shape_conv0, 1, DataType::F32)); | |
/* -----------------------End: [Initialize tensors] */ | |
/* [Configure functions] */ | |
//PadStrideInfo (unsigned int stride_x, unsigned int stride_y, unsigned int pad_left, unsigned int pad_right, unsigned int pad_top, unsigned int pad_bottom, DimensionRoundingType round | |
conv0->configure(&src, &weights0, &biases0, &out_conv0, PadStrideInfo(1 /* stride_x */, 1 /* stride_y */, 1 /* pad_left */, 1 /* pad_right */, 1, 2, DimensionRoundingType::FLOOR )); | |
/* -----------------------End: [Configure functions] */ | |
/*[ Add tensors to memory manager ]*/ | |
// We need 2 memory groups for handling the input and output | |
// We call explicitly allocate after manage() in order to avoid overlapping lifetimes | |
memory_group0 = arm_compute::support::cpp14::make_unique<MemoryGroup>(mm_transitions); | |
memory_group1 = arm_compute::support::cpp14::make_unique<MemoryGroup>(mm_transitions); | |
memory_group0->manage(&out_conv0); | |
out_conv0.allocator()->allocate(); | |
/* -----------------------End: [ Add tensors to memory manager ] */ | |
/* [Allocate tensors] */ | |
// Now that the padding requirements are known we can allocate all tensors | |
src.allocator()->allocate(); | |
weights0.allocator()->allocate(); | |
biases0.allocator()->allocate(); | |
/* -----------------------End: [Allocate tensors] */ | |
// Populate the layers manager. (Validity checks, memory allocations etc) | |
mm_layers->populate(allocator, 1 /* num_pools */); | |
// Populate the transitions manager. (Validity checks, memory allocations etc) | |
mm_transitions->populate(allocator, 2 /* num_pools */); | |
return true; | |
} | |
void do_run() override | |
{ | |
// Acquire memory for the memory groups | |
memory_group0->acquire(); | |
memory_group1->acquire(); | |
{ | |
Window window; | |
window.use_tensor_dimensions(src.info()->tensor_shape()); | |
execute_window_loop(window, [&](const Coordinates & id) | |
{ | |
*reinterpret_cast<float *>(src.ptr_to_element(id)) = 1.0f; | |
}); | |
} | |
{ | |
vector<unsigned long> shape; | |
vector<float>data; | |
npy::LoadArrayFromNumpy("./build/examples/w.npy", shape, data); | |
std::cout << "weights shape contains:"; | |
for (std::vector<unsigned long>::iterator it = shape.begin() ; it != shape.end(); ++it) | |
std::cout << ' ' << *it; | |
std::cout << '\n'; | |
Window window; | |
window.use_tensor_dimensions(weights0.info()->tensor_shape()); | |
const DataLayout data_layout = weights0.info()->data_layout(); | |
const TensorShape tensor_shape = weights0.info()->tensor_shape(); | |
cout << "Num dims " << tensor_shape.num_dimensions() << std::endl; | |
for (int i = 0; i < tensor_shape.num_dimensions(); i++) | |
std::cout << " " << tensor_shape[i]; | |
std::cout << '\n'; | |
std::cout << " WEIGHT WIDTH " << window.x().end(); | |
std::cout << " HT " << window.y().end(); | |
std::cout << " CHANNEL " << window.z().end(); | |
std::cout << " BATCH " << window[3].end() << endl; | |
Iterator out(&weights0, window); | |
int count = 0; | |
execute_window_loop(window, [&](const Coordinates & id) | |
{ | |
std::cout << "( " << id[3] << ", " << id[2] << ", " << id[1] << ", " << id[0] << " ) " ; | |
std::cout << " - " << data[count] << endl; | |
*reinterpret_cast<float *>(out.ptr()) = static_cast<float>( data[count]); | |
count++; | |
}, out); | |
} | |
{ | |
vector<unsigned long> shape; | |
vector<float>data; | |
npy::LoadArrayFromNumpy("./build/examples/b.npy", shape, data); | |
std::cout << "bias shape contains:"; | |
for (std::vector<unsigned long>::iterator it = shape.begin() ; it != shape.end(); ++it) | |
std::cout << ' ' << *it; | |
std::cout << '\n'; | |
int count = 0; | |
Window window; | |
window.use_tensor_dimensions(biases0.info()->tensor_shape()); | |
const DataLayout data_layout = biases0.info()->data_layout(); | |
const TensorShape tensor_shape = biases0.info()->tensor_shape(); | |
cout << "Num dims " << tensor_shape.num_dimensions() << std::endl; | |
for (int i = 0; i < tensor_shape.num_dimensions(); i++) | |
std::cout << " " << tensor_shape[i]; | |
std::cout << '\n'; | |
std::cout << " BIAS WIDTH " << window.x().end(); | |
std::cout << " HT " << window.y().end(); | |
std::cout << " CHANNEL" << window.z().end(); | |
std::cout << " BATCH" << window[3].end() << endl; | |
Iterator out(&biases0, window); | |
execute_window_loop(window, [&](const Coordinates & id) | |
{ | |
std::cout << " B " << data[count] << std::endl; | |
*reinterpret_cast<float *>(out.ptr()) = static_cast<float>( data[count]); | |
count++; | |
}, out); | |
} | |
conv0->run(); | |
{ | |
Window window; | |
window.use_tensor_dimensions(out_conv0.info()->tensor_shape()); | |
const DataLayout data_layout = out_conv0.info()->data_layout(); | |
const TensorShape tensor_shape = out_conv0.info()->tensor_shape(); | |
cout << "max dims " << tensor_shape.num_dimensions() << std::endl; | |
for (int i = 0; i < tensor_shape.num_dimensions(); i++) | |
std::cout << " " << tensor_shape[i]; | |
std::cout << '\n'; | |
std::cout << " OUT WIDTH " << window.x().end(); | |
std::cout << " HT " << window.y().end(); | |
std::cout << " CHANNEL " << window.z().end(); | |
std::cout << " BATCH " << window[3].end() << endl; | |
unsigned long ww = window.x().end(); | |
unsigned long hh = window.y().end(); | |
unsigned long cc = window.z().end(); | |
unsigned long bb = window[3].end(); | |
//const unsigned long shape[] = {ww, hh, cc, bb}; | |
const unsigned long shape[] = {bb, cc, hh, ww}; | |
std::vector <float> data (shape[0] *shape[1] * shape[2] * shape[3]); | |
int count = 0; | |
execute_window_loop(window, [&](const Coordinates & id) | |
{ | |
data[count++] = *reinterpret_cast<float *>(out_conv0.ptr_to_element(id)); | |
}); | |
npy::SaveArrayAsNumpy( "./build/examples/out.npy", false, 4, shape, data); | |
} | |
// Release memory | |
memory_group0->release(); | |
memory_group1->release(); | |
} | |
private: | |
// The src tensor should contain the input image | |
Tensor src{}; | |
// Intermediate tensors used | |
Tensor weights0{}; | |
Tensor biases0{}; | |
Tensor out_conv0{}; | |
// NEON allocator | |
Allocator allocator{}; | |
// Memory groups | |
std::unique_ptr<MemoryGroup> memory_group0{}; | |
std::unique_ptr<MemoryGroup> memory_group1{}; | |
// Layers | |
std::unique_ptr<NEConvolutionLayer> conv0{}; | |
}; | |
/** Main program for cnn test | |
* | |
* The example implements the following CNN architecture: | |
* | |
* Input -> conv0 | |
* | |
* @param[in] argc Number of arguments | |
* @param[in] argv Arguments | |
*/ | |
int main(int argc, char **argv) | |
{ | |
return utils::run_example<TestCNNExample>(argc, argv); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment