This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Really awesome code taken from: http://apangborn.com/2011/05/pixel-processing-using-arm-assembly/ | |
inline static void neon_rgba_to_bgra(unsigned char *src, unsigned char *dst, int numPixels) | |
{ | |
#ifdef __ARM_NEON__ | |
int simd_pixels = numPixels & ~7; // round down to nearest 8 | |
int simd_iterations = simd_pixels >> 3; | |
int col; | |
if(simd_iterations) { // make sure at least 1 iteration | |
__asm__ __volatile__ ("1: \n\t" | |
// structured load of 8 pixels into d0-d3 (64-bit) NEON registers |