Created
March 4, 2015 01:28
-
-
Save ijsf/0d61f27ec065ca601e8d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* autogenerated from gstaudiopack.orc */ | |
#ifdef HAVE_CONFIG_H | |
#include "config.h" | |
#endif | |
#include <glib.h> | |
#ifndef _ORC_INTEGER_TYPEDEFS_ | |
#define _ORC_INTEGER_TYPEDEFS_ | |
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L | |
#include <stdint.h> | |
typedef int8_t orc_int8; | |
typedef int16_t orc_int16; | |
typedef int32_t orc_int32; | |
typedef int64_t orc_int64; | |
typedef uint8_t orc_uint8; | |
typedef uint16_t orc_uint16; | |
typedef uint32_t orc_uint32; | |
typedef uint64_t orc_uint64; | |
#define ORC_UINT64_C(x) UINT64_C(x) | |
#elif defined(_MSC_VER) | |
typedef signed __int8 orc_int8; | |
typedef signed __int16 orc_int16; | |
typedef signed __int32 orc_int32; | |
typedef signed __int64 orc_int64; | |
typedef unsigned __int8 orc_uint8; | |
typedef unsigned __int16 orc_uint16; | |
typedef unsigned __int32 orc_uint32; | |
typedef unsigned __int64 orc_uint64; | |
#define ORC_UINT64_C(x) (x##Ui64) | |
#define inline __inline | |
#else | |
#include <limits.h> | |
typedef signed char orc_int8; | |
typedef short orc_int16; | |
typedef int orc_int32; | |
typedef unsigned char orc_uint8; | |
typedef unsigned short orc_uint16; | |
typedef unsigned int orc_uint32; | |
#if INT_MAX == LONG_MAX | |
typedef long long orc_int64; | |
typedef unsigned long long orc_uint64; | |
#define ORC_UINT64_C(x) (x##ULL) | |
#else | |
typedef long orc_int64; | |
typedef unsigned long orc_uint64; | |
#define ORC_UINT64_C(x) (x##UL) | |
#endif | |
#endif | |
typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16; | |
typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32; | |
typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64; | |
#endif | |
#ifndef ORC_RESTRICT | |
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L | |
#define ORC_RESTRICT restrict | |
#elif defined(__GNUC__) && __GNUC__ >= 4 | |
#define ORC_RESTRICT __restrict__ | |
#else | |
#define ORC_RESTRICT | |
#endif | |
#endif | |
#ifndef ORC_INTERNAL | |
#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590) | |
#define ORC_INTERNAL __attribute__((visibility("hidden"))) | |
#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550) | |
#define ORC_INTERNAL __hidden | |
#elif defined (__GNUC__) | |
#define ORC_INTERNAL __attribute__((visibility("hidden"))) | |
#else | |
#define ORC_INTERNAL | |
#endif | |
#endif | |
#ifndef DISABLE_ORC | |
#include <orc/orc.h> | |
#endif | |
void audio_orc_unpack_u8 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_s8 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_u16 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_s16 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_u16_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_s16_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_u24_32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_s24_32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_u24_32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_s24_32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_u32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_u32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_s32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_s32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_f32 (gdouble * ORC_RESTRICT d1, const gfloat * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_f32_swap (gdouble * ORC_RESTRICT d1, const gfloat * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_f64 (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n); | |
void audio_orc_unpack_f64_swap (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_u8 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_s8 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_u16 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_s16 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_u16_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_s16_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_u24_32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_s24_32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_u24_32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_s24_32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_u32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_s32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_u32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_s32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_f32 (gfloat * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_f32_swap (gfloat * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_f64 (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n); | |
void audio_orc_pack_f64_swap (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n); | |
void audio_orc_splat_u16 (guint16 * ORC_RESTRICT d1, int p1, int n); | |
void audio_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n); | |
void audio_orc_splat_u64 (guint64 * ORC_RESTRICT d1, int p1, int n); | |
/* begin Orc C target preamble */ | |
#define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x))) | |
#define ORC_ABS(a) ((a)<0 ? -(a) : (a)) | |
#define ORC_MIN(a,b) ((a)<(b) ? (a) : (b)) | |
#define ORC_MAX(a,b) ((a)>(b) ? (a) : (b)) | |
#define ORC_SB_MAX 127 | |
#define ORC_SB_MIN (-1-ORC_SB_MAX) | |
#define ORC_UB_MAX 255 | |
#define ORC_UB_MIN 0 | |
#define ORC_SW_MAX 32767 | |
#define ORC_SW_MIN (-1-ORC_SW_MAX) | |
#define ORC_UW_MAX 65535 | |
#define ORC_UW_MIN 0 | |
#define ORC_SL_MAX 2147483647 | |
#define ORC_SL_MIN (-1-ORC_SL_MAX) | |
#define ORC_UL_MAX 4294967295U | |
#define ORC_UL_MIN 0 | |
#define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX) | |
#define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX) | |
#define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX) | |
#define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX) | |
#define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX) | |
#define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX) | |
#define ORC_SWAP_W(x) ((((x)&0xffU)<<8) | (((x)&0xff00U)>>8)) | |
#define ORC_SWAP_L(x) ((((x)&0xffU)<<24) | (((x)&0xff00U)<<8) | (((x)&0xff0000U)>>8) | (((x)&0xff000000U)>>24)) | |
#define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56)) | |
#define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset))) | |
#define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff)) | |
#define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0)) | |
#define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff))) | |
#define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0)) | |
#ifndef ORC_RESTRICT | |
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L | |
#define ORC_RESTRICT restrict | |
#elif defined(__GNUC__) && __GNUC__ >= 4 | |
#define ORC_RESTRICT __restrict__ | |
#else | |
#define ORC_RESTRICT | |
#endif | |
#endif | |
/* end Orc C target preamble */ | |
/* audio_orc_unpack_u8 (static implementation) */ | |
void | |
audio_orc_unpack_u8 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 4: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
Laudio_orc_unpack_u815: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_u81 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_u82 \n\ | |
Laudio_orc_unpack_u81: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_u82: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u86 \n\ | |
Laudio_orc_unpack_u85: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadb \n\ | |
vld1.8 d6[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q3, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 d6, d6, #24 \n\ | |
# 5: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 6: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000001 \n\ | |
bne Laudio_orc_unpack_u85 \n\ | |
Laudio_orc_unpack_u86: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u812 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u811 \n\ | |
Laudio_orc_unpack_u89: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadb \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q3, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 q3, q3, #24 \n\ | |
# 5: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 6: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_u89 \n\ | |
b Laudio_orc_unpack_u812 \n\ | |
Laudio_orc_unpack_u811: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u810 \n\ | |
Laudio_orc_unpack_u88: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadb \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q3, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 q3, q3, #24 \n\ | |
# 5: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 6: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_u88 \n\ | |
b Laudio_orc_unpack_u812 \n\ | |
Laudio_orc_unpack_u810: \n\ | |
Laudio_orc_unpack_u87: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadb \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q3, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 q3, q3, #24 \n\ | |
# 5: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 6: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_u87 \n\ | |
Laudio_orc_unpack_u812: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u814 \n\ | |
Laudio_orc_unpack_u813: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadb \n\ | |
vld1.8 d6[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q3, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 d6, d6, #24 \n\ | |
# 5: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 6: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000001 \n\ | |
bne Laudio_orc_unpack_u813 \n\ | |
Laudio_orc_unpack_u814: \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_unpack_u820: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_s8 (static implementation) */ | |
void | |
audio_orc_unpack_s8 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
Laudio_orc_unpack_s815: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_s81 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_s82 \n\ | |
Laudio_orc_unpack_s81: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_s82: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s86 \n\ | |
Laudio_orc_unpack_s85: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadb \n\ | |
vld1.8 d4[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q2, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 d4, d4, #24 \n\ | |
# 4: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000001 \n\ | |
bne Laudio_orc_unpack_s85 \n\ | |
Laudio_orc_unpack_s86: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s812 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s811 \n\ | |
Laudio_orc_unpack_s89: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadb \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q2, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 q2, q2, #24 \n\ | |
# 4: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_s89 \n\ | |
b Laudio_orc_unpack_s812 \n\ | |
Laudio_orc_unpack_s811: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s810 \n\ | |
Laudio_orc_unpack_s88: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadb \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q2, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 q2, q2, #24 \n\ | |
# 4: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_s88 \n\ | |
b Laudio_orc_unpack_s812 \n\ | |
Laudio_orc_unpack_s810: \n\ | |
Laudio_orc_unpack_s87: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadb \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q2, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 q2, q2, #24 \n\ | |
# 4: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_s87 \n\ | |
Laudio_orc_unpack_s812: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s814 \n\ | |
Laudio_orc_unpack_s813: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadb \n\ | |
vld1.8 d4[0], [r3] \n\ | |
# 1: convubw \n\ | |
vmovl.u8 q2, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 d4, d4, #24 \n\ | |
# 4: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000001 \n\ | |
bne Laudio_orc_unpack_s813 \n\ | |
Laudio_orc_unpack_s814: \n\ | |
nop \n\ | |
Laudio_orc_unpack_s820: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_u16 (static implementation) */ | |
void | |
audio_orc_unpack_u16 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 3: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
Laudio_orc_unpack_u1615: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_u161 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_u162 \n\ | |
Laudio_orc_unpack_u161: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_u162: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u166 \n\ | |
Laudio_orc_unpack_u165: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d6[0], [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 2: shll \n\ | |
vshl.i32 d6, d6, #16 \n\ | |
# 4: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 5: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne Laudio_orc_unpack_u165 \n\ | |
Laudio_orc_unpack_u166: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u1612 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u1611 \n\ | |
Laudio_orc_unpack_u169: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d6, [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 2: shll \n\ | |
vshl.i32 q3, q3, #16 \n\ | |
# 4: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_u169 \n\ | |
b Laudio_orc_unpack_u1612 \n\ | |
Laudio_orc_unpack_u1611: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u1610 \n\ | |
Laudio_orc_unpack_u168: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d6, [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 2: shll \n\ | |
vshl.i32 q3, q3, #16 \n\ | |
# 4: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_u168 \n\ | |
b Laudio_orc_unpack_u1612 \n\ | |
Laudio_orc_unpack_u1610: \n\ | |
Laudio_orc_unpack_u167: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d6, [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 2: shll \n\ | |
vshl.i32 q3, q3, #16 \n\ | |
# 4: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_u167 \n\ | |
Laudio_orc_unpack_u1612: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u1614 \n\ | |
Laudio_orc_unpack_u1613: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d6[0], [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 2: shll \n\ | |
vshl.i32 d6, d6, #16 \n\ | |
# 4: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 5: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne Laudio_orc_unpack_u1613 \n\ | |
Laudio_orc_unpack_u1614: \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_unpack_u1620: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_s16 (static implementation) */ | |
void | |
audio_orc_unpack_s16 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
Laudio_orc_unpack_s1615: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_unpack_s164 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s1612 \n\ | |
Laudio_orc_unpack_s163: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #16 \n\ | |
# 3: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_s163 \n\ | |
b Laudio_orc_unpack_s1612 \n\ | |
Laudio_orc_unpack_s164: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_s161 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_s162 \n\ | |
Laudio_orc_unpack_s161: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_s162: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s166 \n\ | |
Laudio_orc_unpack_s165: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d4[0], [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 d4, d4, #16 \n\ | |
# 3: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne Laudio_orc_unpack_s165 \n\ | |
Laudio_orc_unpack_s166: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s1612 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s1611 \n\ | |
Laudio_orc_unpack_s169: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #16 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_s169 \n\ | |
b Laudio_orc_unpack_s1612 \n\ | |
Laudio_orc_unpack_s1611: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s1610 \n\ | |
Laudio_orc_unpack_s168: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #16 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_s168 \n\ | |
b Laudio_orc_unpack_s1612 \n\ | |
Laudio_orc_unpack_s1610: \n\ | |
Laudio_orc_unpack_s167: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #16 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_s167 \n\ | |
Laudio_orc_unpack_s1612: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s1614 \n\ | |
Laudio_orc_unpack_s1613: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d4[0], [r3] \n\ | |
# 1: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 d4, d4, #16 \n\ | |
# 3: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne Laudio_orc_unpack_s1613 \n\ | |
Laudio_orc_unpack_s1614: \n\ | |
nop \n\ | |
Laudio_orc_unpack_s1620: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_u16_swap (static implementation) */ | |
void | |
audio_orc_unpack_u16_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 4: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
Laudio_orc_unpack_u16_swap15: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_u16_swap1 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_u16_swap2 \n\ | |
Laudio_orc_unpack_u16_swap1: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_u16_swap2: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u16_swap6 \n\ | |
Laudio_orc_unpack_u16_swap5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d6[0], [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 d6, d6, #16 \n\ | |
# 5: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 6: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne Laudio_orc_unpack_u16_swap5 \n\ | |
Laudio_orc_unpack_u16_swap6: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u16_swap12 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u16_swap11 \n\ | |
Laudio_orc_unpack_u16_swap9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d6, [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 q3, q3, #16 \n\ | |
# 5: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 6: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_u16_swap9 \n\ | |
b Laudio_orc_unpack_u16_swap12 \n\ | |
Laudio_orc_unpack_u16_swap11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u16_swap10 \n\ | |
Laudio_orc_unpack_u16_swap8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d6, [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 q3, q3, #16 \n\ | |
# 5: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 6: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_u16_swap8 \n\ | |
b Laudio_orc_unpack_u16_swap12 \n\ | |
Laudio_orc_unpack_u16_swap10: \n\ | |
Laudio_orc_unpack_u16_swap7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d6, [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 q3, q3, #16 \n\ | |
# 5: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 6: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_u16_swap7 \n\ | |
Laudio_orc_unpack_u16_swap12: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u16_swap14 \n\ | |
Laudio_orc_unpack_u16_swap13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d6[0], [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q3, d6 \n\ | |
# 3: shll \n\ | |
vshl.i32 d6, d6, #16 \n\ | |
# 5: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 6: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne Laudio_orc_unpack_u16_swap13 \n\ | |
Laudio_orc_unpack_u16_swap14: \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_unpack_u16_swap20: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_s16_swap (static implementation) */ | |
void | |
audio_orc_unpack_s16_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
Laudio_orc_unpack_s16_swap15: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_s16_swap1 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_s16_swap2 \n\ | |
Laudio_orc_unpack_s16_swap1: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_s16_swap2: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s16_swap6 \n\ | |
Laudio_orc_unpack_s16_swap5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d4[0], [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 d4, d4, #16 \n\ | |
# 4: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne Laudio_orc_unpack_s16_swap5 \n\ | |
Laudio_orc_unpack_s16_swap6: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s16_swap12 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s16_swap11 \n\ | |
Laudio_orc_unpack_s16_swap9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 q2, q2, #16 \n\ | |
# 4: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_s16_swap9 \n\ | |
b Laudio_orc_unpack_s16_swap12 \n\ | |
Laudio_orc_unpack_s16_swap11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s16_swap10 \n\ | |
Laudio_orc_unpack_s16_swap8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 q2, q2, #16 \n\ | |
# 4: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_s16_swap8 \n\ | |
b Laudio_orc_unpack_s16_swap12 \n\ | |
Laudio_orc_unpack_s16_swap10: \n\ | |
Laudio_orc_unpack_s16_swap7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadw \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 q2, q2, #16 \n\ | |
# 4: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_s16_swap7 \n\ | |
Laudio_orc_unpack_s16_swap12: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s16_swap14 \n\ | |
Laudio_orc_unpack_s16_swap13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadw \n\ | |
vld1.16 d4[0], [r3] \n\ | |
# 1: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 2: convuwl \n\ | |
vmovl.u16 q2, d4 \n\ | |
# 3: shll \n\ | |
vshl.i32 d4, d4, #16 \n\ | |
# 4: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000002 \n\ | |
bne Laudio_orc_unpack_s16_swap13 \n\ | |
Laudio_orc_unpack_s16_swap14: \n\ | |
nop \n\ | |
Laudio_orc_unpack_s16_swap20: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_u24_32 (static implementation) */ | |
void | |
audio_orc_unpack_u24_32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 2: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
Laudio_orc_unpack_u24_3215: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_u24_321 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_u24_322 \n\ | |
Laudio_orc_unpack_u24_321: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_u24_322: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u24_326 \n\ | |
Laudio_orc_unpack_u24_325: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 d6, d6, #8 \n\ | |
# 3: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 4: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_u24_325 \n\ | |
Laudio_orc_unpack_u24_326: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u24_3212 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u24_3211 \n\ | |
Laudio_orc_unpack_u24_329: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 q3, q3, #8 \n\ | |
# 3: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_u24_329 \n\ | |
b Laudio_orc_unpack_u24_3212 \n\ | |
Laudio_orc_unpack_u24_3211: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u24_3210 \n\ | |
Laudio_orc_unpack_u24_328: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 q3, q3, #8 \n\ | |
# 3: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_u24_328 \n\ | |
b Laudio_orc_unpack_u24_3212 \n\ | |
Laudio_orc_unpack_u24_3210: \n\ | |
Laudio_orc_unpack_u24_327: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 q3, q3, #8 \n\ | |
# 3: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_u24_327 \n\ | |
Laudio_orc_unpack_u24_3212: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u24_3214 \n\ | |
Laudio_orc_unpack_u24_3213: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 d6, d6, #8 \n\ | |
# 3: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 4: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_u24_3213 \n\ | |
Laudio_orc_unpack_u24_3214: \n\ | |
Laudio_orc_unpack_u24_3220: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_s24_32 (static implementation) */ | |
void | |
audio_orc_unpack_s24_32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
Laudio_orc_unpack_s24_3215: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_unpack_s24_324 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s24_3212 \n\ | |
Laudio_orc_unpack_s24_323: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s24_323 \n\ | |
b Laudio_orc_unpack_s24_3212 \n\ | |
Laudio_orc_unpack_s24_324: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_s24_321 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_s24_322 \n\ | |
Laudio_orc_unpack_s24_321: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_s24_322: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s24_326 \n\ | |
Laudio_orc_unpack_s24_325: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 d4, d4, #8 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_s24_325 \n\ | |
Laudio_orc_unpack_s24_326: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s24_3212 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s24_3211 \n\ | |
Laudio_orc_unpack_s24_329: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s24_329 \n\ | |
b Laudio_orc_unpack_s24_3212 \n\ | |
Laudio_orc_unpack_s24_3211: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s24_3210 \n\ | |
Laudio_orc_unpack_s24_328: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s24_328 \n\ | |
b Laudio_orc_unpack_s24_3212 \n\ | |
Laudio_orc_unpack_s24_3210: \n\ | |
Laudio_orc_unpack_s24_327: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s24_327 \n\ | |
Laudio_orc_unpack_s24_3212: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s24_3214 \n\ | |
Laudio_orc_unpack_s24_3213: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shll \n\ | |
vshl.i32 d4, d4, #8 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_s24_3213 \n\ | |
Laudio_orc_unpack_s24_3214: \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_unpack_s24_3220: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_u24_32_swap (static implementation) */ | |
void | |
audio_orc_unpack_u24_32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 3: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
Laudio_orc_unpack_u24_32_swap15: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_u24_32_swap1 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_u24_32_swap2 \n\ | |
Laudio_orc_unpack_u24_32_swap1: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_u24_32_swap2: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u24_32_swap6 \n\ | |
Laudio_orc_unpack_u24_32_swap5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d6, d6 \n\ | |
# 2: shll \n\ | |
vshl.i32 d6, d6, #8 \n\ | |
# 4: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 5: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_u24_32_swap5 \n\ | |
Laudio_orc_unpack_u24_32_swap6: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u24_32_swap12 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u24_32_swap11 \n\ | |
Laudio_orc_unpack_u24_32_swap9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 2: shll \n\ | |
vshl.i32 q3, q3, #8 \n\ | |
# 4: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_u24_32_swap9 \n\ | |
b Laudio_orc_unpack_u24_32_swap12 \n\ | |
Laudio_orc_unpack_u24_32_swap11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u24_32_swap10 \n\ | |
Laudio_orc_unpack_u24_32_swap8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 2: shll \n\ | |
vshl.i32 q3, q3, #8 \n\ | |
# 4: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_u24_32_swap8 \n\ | |
b Laudio_orc_unpack_u24_32_swap12 \n\ | |
Laudio_orc_unpack_u24_32_swap10: \n\ | |
Laudio_orc_unpack_u24_32_swap7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 2: shll \n\ | |
vshl.i32 q3, q3, #8 \n\ | |
# 4: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_u24_32_swap7 \n\ | |
Laudio_orc_unpack_u24_32_swap12: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u24_32_swap14 \n\ | |
Laudio_orc_unpack_u24_32_swap13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d6, d6 \n\ | |
# 2: shll \n\ | |
vshl.i32 d6, d6, #8 \n\ | |
# 4: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 5: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_u24_32_swap13 \n\ | |
Laudio_orc_unpack_u24_32_swap14: \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_unpack_u24_32_swap20: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_s24_32_swap (static implementation) */ | |
void | |
audio_orc_unpack_s24_32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
Laudio_orc_unpack_s24_32_swap15: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_unpack_s24_32_swap4 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s24_32_swap12 \n\ | |
Laudio_orc_unpack_s24_32_swap3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 3: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s24_32_swap3 \n\ | |
b Laudio_orc_unpack_s24_32_swap12 \n\ | |
Laudio_orc_unpack_s24_32_swap4: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_s24_32_swap1 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_s24_32_swap2 \n\ | |
Laudio_orc_unpack_s24_32_swap1: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_s24_32_swap2: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s24_32_swap6 \n\ | |
Laudio_orc_unpack_s24_32_swap5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 d4, d4, #8 \n\ | |
# 3: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_s24_32_swap5 \n\ | |
Laudio_orc_unpack_s24_32_swap6: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s24_32_swap12 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s24_32_swap11 \n\ | |
Laudio_orc_unpack_s24_32_swap9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s24_32_swap9 \n\ | |
b Laudio_orc_unpack_s24_32_swap12 \n\ | |
Laudio_orc_unpack_s24_32_swap11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s24_32_swap10 \n\ | |
Laudio_orc_unpack_s24_32_swap8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s24_32_swap8 \n\ | |
b Laudio_orc_unpack_s24_32_swap12 \n\ | |
Laudio_orc_unpack_s24_32_swap10: \n\ | |
Laudio_orc_unpack_s24_32_swap7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: shll \n\ | |
vshl.i32 q2, q2, #8 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s24_32_swap7 \n\ | |
Laudio_orc_unpack_s24_32_swap12: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s24_32_swap14 \n\ | |
Laudio_orc_unpack_s24_32_swap13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 2: shll \n\ | |
vshl.i32 d4, d4, #8 \n\ | |
# 3: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_s24_32_swap13 \n\ | |
Laudio_orc_unpack_s24_32_swap14: \n\ | |
nop \n\ | |
Laudio_orc_unpack_s24_32_swap20: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_u32 (static implementation) */ | |
void | |
audio_orc_unpack_u32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
Laudio_orc_unpack_u3215: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_unpack_u324 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u3212 \n\ | |
Laudio_orc_unpack_u323: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.8 { d6, d7 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_u323 \n\ | |
b Laudio_orc_unpack_u3212 \n\ | |
Laudio_orc_unpack_u324: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_u321 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_u322 \n\ | |
Laudio_orc_unpack_u321: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_u322: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u326 \n\ | |
Laudio_orc_unpack_u325: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_u325 \n\ | |
Laudio_orc_unpack_u326: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u3212 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u3211 \n\ | |
Laudio_orc_unpack_u329: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_u329 \n\ | |
b Laudio_orc_unpack_u3212 \n\ | |
Laudio_orc_unpack_u3211: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u3210 \n\ | |
Laudio_orc_unpack_u328: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_u328 \n\ | |
b Laudio_orc_unpack_u3212 \n\ | |
Laudio_orc_unpack_u3210: \n\ | |
Laudio_orc_unpack_u327: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_u327 \n\ | |
Laudio_orc_unpack_u3212: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u3214 \n\ | |
Laudio_orc_unpack_u3213: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_u3213 \n\ | |
Laudio_orc_unpack_u3214: \n\ | |
nop \n\ | |
Laudio_orc_unpack_u3220: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_u32_swap (static implementation) */ | |
void | |
audio_orc_unpack_u32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 2: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
Laudio_orc_unpack_u32_swap15: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_u32_swap1 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_u32_swap2 \n\ | |
Laudio_orc_unpack_u32_swap1: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_u32_swap2: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u32_swap6 \n\ | |
Laudio_orc_unpack_u32_swap5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d6, d6 \n\ | |
# 3: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 4: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_u32_swap5 \n\ | |
Laudio_orc_unpack_u32_swap6: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u32_swap12 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u32_swap11 \n\ | |
Laudio_orc_unpack_u32_swap9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 3: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_u32_swap9 \n\ | |
b Laudio_orc_unpack_u32_swap12 \n\ | |
Laudio_orc_unpack_u32_swap11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u32_swap10 \n\ | |
Laudio_orc_unpack_u32_swap8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 3: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_u32_swap8 \n\ | |
b Laudio_orc_unpack_u32_swap12 \n\ | |
Laudio_orc_unpack_u32_swap10: \n\ | |
Laudio_orc_unpack_u32_swap7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 3: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_u32_swap7 \n\ | |
Laudio_orc_unpack_u32_swap12: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_u32_swap14 \n\ | |
Laudio_orc_unpack_u32_swap13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d6, d6 \n\ | |
# 3: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 4: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_u32_swap13 \n\ | |
Laudio_orc_unpack_u32_swap14: \n\ | |
Laudio_orc_unpack_u32_swap20: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_s32 (static implementation) */ | |
void | |
audio_orc_unpack_s32 (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
Laudio_orc_unpack_s3215: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_unpack_s324 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s3212 \n\ | |
Laudio_orc_unpack_s323: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s323 \n\ | |
b Laudio_orc_unpack_s3212 \n\ | |
Laudio_orc_unpack_s324: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_s321 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_s322 \n\ | |
Laudio_orc_unpack_s321: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_s322: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s326 \n\ | |
Laudio_orc_unpack_s325: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_s325 \n\ | |
Laudio_orc_unpack_s326: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s3212 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s3211 \n\ | |
Laudio_orc_unpack_s329: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s329 \n\ | |
b Laudio_orc_unpack_s3212 \n\ | |
Laudio_orc_unpack_s3211: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s3210 \n\ | |
Laudio_orc_unpack_s328: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s328 \n\ | |
b Laudio_orc_unpack_s3212 \n\ | |
Laudio_orc_unpack_s3210: \n\ | |
Laudio_orc_unpack_s327: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s327 \n\ | |
Laudio_orc_unpack_s3212: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s3214 \n\ | |
Laudio_orc_unpack_s3213: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_s3213 \n\ | |
Laudio_orc_unpack_s3214: \n\ | |
nop \n\ | |
Laudio_orc_unpack_s3220: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_s32_swap (static implementation) */ | |
void | |
audio_orc_unpack_s32_swap (gint32 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
Laudio_orc_unpack_s32_swap15: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_unpack_s32_swap4 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s32_swap12 \n\ | |
Laudio_orc_unpack_s32_swap3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s32_swap3 \n\ | |
b Laudio_orc_unpack_s32_swap12 \n\ | |
Laudio_orc_unpack_s32_swap4: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_s32_swap1 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_s32_swap2 \n\ | |
Laudio_orc_unpack_s32_swap1: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_s32_swap2: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s32_swap6 \n\ | |
Laudio_orc_unpack_s32_swap5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_s32_swap5 \n\ | |
Laudio_orc_unpack_s32_swap6: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s32_swap12 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s32_swap11 \n\ | |
Laudio_orc_unpack_s32_swap9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s32_swap9 \n\ | |
b Laudio_orc_unpack_s32_swap12 \n\ | |
Laudio_orc_unpack_s32_swap11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s32_swap10 \n\ | |
Laudio_orc_unpack_s32_swap8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s32_swap8 \n\ | |
b Laudio_orc_unpack_s32_swap12 \n\ | |
Laudio_orc_unpack_s32_swap10: \n\ | |
Laudio_orc_unpack_s32_swap7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_s32_swap7 \n\ | |
Laudio_orc_unpack_s32_swap12: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_s32_swap14 \n\ | |
Laudio_orc_unpack_s32_swap13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_unpack_s32_swap13 \n\ | |
Laudio_orc_unpack_s32_swap14: \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_unpack_s32_swap20: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_f32 (static implementation) */ | |
void | |
audio_orc_unpack_f32 (gdouble * ORC_RESTRICT d1, const gfloat * ORC_RESTRICT s1, int n){ | |
int i; | |
orc_union64 * ORC_RESTRICT ptr0; | |
const orc_union32 * ORC_RESTRICT ptr4; | |
orc_union32 var32; | |
orc_union64 var33; | |
ptr0 = (orc_union64 *)d1; | |
ptr4 = (orc_union32 *)s1; | |
for (i = 0; i < n; i++) { | |
/* 0: loadl */ | |
var32 = ptr4[i]; | |
/* 1: convfd */ | |
{ | |
orc_union32 _src1; | |
_src1.i = ORC_DENORMAL(var32.i); | |
var33.f = _src1.f; | |
} | |
/* 2: storeq */ | |
ptr0[i] = var33; | |
} | |
} | |
/* audio_orc_unpack_f32_swap (static implementation) */ | |
void | |
audio_orc_unpack_f32_swap (gdouble * ORC_RESTRICT d1, const gfloat * ORC_RESTRICT s1, int n){ | |
int i; | |
orc_union64 * ORC_RESTRICT ptr0; | |
const orc_union32 * ORC_RESTRICT ptr4; | |
orc_union32 var33; | |
orc_union64 var34; | |
orc_union32 var35; | |
ptr0 = (orc_union64 *)d1; | |
ptr4 = (orc_union32 *)s1; | |
for (i = 0; i < n; i++) { | |
/* 0: loadl */ | |
var33 = ptr4[i]; | |
/* 1: swapl */ | |
var35.i = ORC_SWAP_L(var33.i); | |
/* 2: convfd */ | |
{ | |
orc_union32 _src1; | |
_src1.i = ORC_DENORMAL(var35.i); | |
var34.f = _src1.f; | |
} | |
/* 3: storeq */ | |
ptr0[i] = var34; | |
} | |
} | |
/* audio_orc_unpack_f64 (static implementation) */ | |
void | |
audio_orc_unpack_f64 (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 1; | |
uint32_t counter3 = counter2 & ((1<<1)-1); | |
asm(" \n\ | |
Laudio_orc_unpack_f6415: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_unpack_f644 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_f6412 \n\ | |
Laudio_orc_unpack_f643: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_f643 \n\ | |
b Laudio_orc_unpack_f6412 \n\ | |
Laudio_orc_unpack_f644: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #3 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_f641 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #1 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #1 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_f642 \n\ | |
Laudio_orc_unpack_f641: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_f642: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_f646 \n\ | |
Laudio_orc_unpack_f645: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_f645 \n\ | |
Laudio_orc_unpack_f646: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_f6412 \n\ | |
asr r1, ip, #19 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_f6411 \n\ | |
Laudio_orc_unpack_f649: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_f649 \n\ | |
b Laudio_orc_unpack_f6412 \n\ | |
Laudio_orc_unpack_f6411: \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_f6410 \n\ | |
Laudio_orc_unpack_f648: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_f648 \n\ | |
b Laudio_orc_unpack_f6412 \n\ | |
Laudio_orc_unpack_f6410: \n\ | |
Laudio_orc_unpack_f647: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_f647 \n\ | |
Laudio_orc_unpack_f6412: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_f6414 \n\ | |
Laudio_orc_unpack_f6413: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_f6413 \n\ | |
Laudio_orc_unpack_f6414: \n\ | |
nop \n\ | |
Laudio_orc_unpack_f6420: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_unpack_f64_swap (static implementation) */ | |
void | |
audio_orc_unpack_f64_swap (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 1; | |
uint32_t counter3 = counter2 & ((1<<1)-1); | |
asm(" \n\ | |
Laudio_orc_unpack_f64_swap15: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_unpack_f64_swap4 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_f64_swap12 \n\ | |
Laudio_orc_unpack_f64_swap3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_f64_swap3 \n\ | |
b Laudio_orc_unpack_f64_swap12 \n\ | |
Laudio_orc_unpack_f64_swap4: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #3 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_unpack_f64_swap1 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #1 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #1 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_unpack_f64_swap2 \n\ | |
Laudio_orc_unpack_f64_swap1: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_unpack_f64_swap2: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_f64_swap6 \n\ | |
Laudio_orc_unpack_f64_swap5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 d4, d4 \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_f64_swap5 \n\ | |
Laudio_orc_unpack_f64_swap6: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_f64_swap12 \n\ | |
asr r1, ip, #19 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_f64_swap11 \n\ | |
Laudio_orc_unpack_f64_swap9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_f64_swap9 \n\ | |
b Laudio_orc_unpack_f64_swap12 \n\ | |
Laudio_orc_unpack_f64_swap11: \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_unpack_f64_swap10 \n\ | |
Laudio_orc_unpack_f64_swap8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_f64_swap8 \n\ | |
b Laudio_orc_unpack_f64_swap12 \n\ | |
Laudio_orc_unpack_f64_swap10: \n\ | |
Laudio_orc_unpack_f64_swap7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_unpack_f64_swap7 \n\ | |
Laudio_orc_unpack_f64_swap12: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_unpack_f64_swap14 \n\ | |
Laudio_orc_unpack_f64_swap13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 d4, d4 \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_unpack_f64_swap13 \n\ | |
Laudio_orc_unpack_f64_swap14: \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_unpack_f64_swap20: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_u8 (static implementation) */ | |
void | |
audio_orc_pack_u8 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
Laudio_orc_pack_u815: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_u81 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_u82 \n\ | |
Laudio_orc_pack_u81: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_u82: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u86 \n\ | |
Laudio_orc_pack_u85: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #24 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: convwb \n\ | |
vmovn.i16 d6, q3 \n\ | |
# 6: storeb \n\ | |
vst1.8 d6[0], [r2] \n\ | |
add r2, r2, #0x00000001 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_u85 \n\ | |
Laudio_orc_pack_u86: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u812 \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_u811 \n\ | |
Laudio_orc_pack_u89: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #24 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: convwb \n\ | |
vmovn.i16 d6, q3 \n\ | |
# 6: storeb \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u89 \n\ | |
b Laudio_orc_pack_u812 \n\ | |
Laudio_orc_pack_u811: \n\ | |
asr r1, ip, #11 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_u810 \n\ | |
Laudio_orc_pack_u88: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #24 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: convwb \n\ | |
vmovn.i16 d6, q3 \n\ | |
# 6: storeb \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u88 \n\ | |
b Laudio_orc_pack_u812 \n\ | |
Laudio_orc_pack_u810: \n\ | |
Laudio_orc_pack_u87: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #24 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: convwb \n\ | |
vmovn.i16 d6, q3 \n\ | |
# 6: storeb \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u87 \n\ | |
Laudio_orc_pack_u812: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u814 \n\ | |
Laudio_orc_pack_u813: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #24 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: convwb \n\ | |
vmovn.i16 d6, q3 \n\ | |
# 6: storeb \n\ | |
vst1.8 d6[0], [r2] \n\ | |
add r2, r2, #0x00000001 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_u813 \n\ | |
Laudio_orc_pack_u814: \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_pack_u820: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_s8 (static implementation) */ | |
void | |
audio_orc_pack_s8 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
Laudio_orc_pack_s815: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_s81 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_s82 \n\ | |
Laudio_orc_pack_s81: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_s82: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s86 \n\ | |
Laudio_orc_pack_s85: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #24 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: convwb \n\ | |
vmovn.i16 d4, q2 \n\ | |
# 4: storeb \n\ | |
vst1.8 d4[0], [r2] \n\ | |
add r2, r2, #0x00000001 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_s85 \n\ | |
Laudio_orc_pack_s86: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s812 \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_s811 \n\ | |
Laudio_orc_pack_s89: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #24 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: convwb \n\ | |
vmovn.i16 d4, q2 \n\ | |
# 4: storeb \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s89 \n\ | |
b Laudio_orc_pack_s812 \n\ | |
Laudio_orc_pack_s811: \n\ | |
asr r1, ip, #11 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_s810 \n\ | |
Laudio_orc_pack_s88: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #24 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: convwb \n\ | |
vmovn.i16 d4, q2 \n\ | |
# 4: storeb \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s88 \n\ | |
b Laudio_orc_pack_s812 \n\ | |
Laudio_orc_pack_s810: \n\ | |
Laudio_orc_pack_s87: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #24 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: convwb \n\ | |
vmovn.i16 d4, q2 \n\ | |
# 4: storeb \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s87 \n\ | |
Laudio_orc_pack_s812: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s814 \n\ | |
Laudio_orc_pack_s813: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #24 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: convwb \n\ | |
vmovn.i16 d4, q2 \n\ | |
# 4: storeb \n\ | |
vst1.8 d4[0], [r2] \n\ | |
add r2, r2, #0x00000001 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_s813 \n\ | |
Laudio_orc_pack_s814: \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_pack_s820: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_u16 (static implementation) */ | |
void | |
audio_orc_pack_u16 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
Laudio_orc_pack_u1615: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #1 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_u161 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_u162 \n\ | |
Laudio_orc_pack_u161: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_u162: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u166 \n\ | |
Laudio_orc_pack_u165: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: storew \n\ | |
vst1.16 d6[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_u165 \n\ | |
Laudio_orc_pack_u166: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u1612 \n\ | |
asr r1, ip, #16 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_u1611 \n\ | |
Laudio_orc_pack_u169: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: storew \n\ | |
vst1.64 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u169 \n\ | |
b Laudio_orc_pack_u1612 \n\ | |
Laudio_orc_pack_u1611: \n\ | |
asr r1, ip, #12 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_u1610 \n\ | |
Laudio_orc_pack_u168: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: storew \n\ | |
vst1.64 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u168 \n\ | |
b Laudio_orc_pack_u1612 \n\ | |
Laudio_orc_pack_u1610: \n\ | |
Laudio_orc_pack_u167: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: storew \n\ | |
vst1.64 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u167 \n\ | |
Laudio_orc_pack_u1612: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u1614 \n\ | |
Laudio_orc_pack_u1613: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: storew \n\ | |
vst1.16 d6[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_u1613 \n\ | |
Laudio_orc_pack_u1614: \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_pack_u1620: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_s16 (static implementation) */ | |
void | |
audio_orc_pack_s16 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
Laudio_orc_pack_s1615: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_pack_s164 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s1612 \n\ | |
Laudio_orc_pack_s163: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: storew \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s163 \n\ | |
b Laudio_orc_pack_s1612 \n\ | |
Laudio_orc_pack_s164: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #1 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_s161 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_s162 \n\ | |
Laudio_orc_pack_s161: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_s162: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s166 \n\ | |
Laudio_orc_pack_s165: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: storew \n\ | |
vst1.16 d4[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_s165 \n\ | |
Laudio_orc_pack_s166: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s1612 \n\ | |
asr r1, ip, #16 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_s1611 \n\ | |
Laudio_orc_pack_s169: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: storew \n\ | |
vst1.64 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s169 \n\ | |
b Laudio_orc_pack_s1612 \n\ | |
Laudio_orc_pack_s1611: \n\ | |
asr r1, ip, #12 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_s1610 \n\ | |
Laudio_orc_pack_s168: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: storew \n\ | |
vst1.64 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s168 \n\ | |
b Laudio_orc_pack_s1612 \n\ | |
Laudio_orc_pack_s1610: \n\ | |
Laudio_orc_pack_s167: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: storew \n\ | |
vst1.64 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s167 \n\ | |
Laudio_orc_pack_s1612: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s1614 \n\ | |
Laudio_orc_pack_s1613: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: storew \n\ | |
vst1.16 d4[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_s1613 \n\ | |
Laudio_orc_pack_s1614: \n\ | |
nop \n\ | |
Laudio_orc_pack_s1620: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_u16_swap (static implementation) */ | |
void | |
audio_orc_pack_u16_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
Laudio_orc_pack_u16_swap15: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #1 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_u16_swap1 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_u16_swap2 \n\ | |
Laudio_orc_pack_u16_swap1: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_u16_swap2: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u16_swap6 \n\ | |
Laudio_orc_pack_u16_swap5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 6: storew \n\ | |
vst1.16 d6[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_u16_swap5 \n\ | |
Laudio_orc_pack_u16_swap6: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u16_swap12 \n\ | |
asr r1, ip, #16 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_u16_swap11 \n\ | |
Laudio_orc_pack_u16_swap9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 6: storew \n\ | |
vst1.64 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u16_swap9 \n\ | |
b Laudio_orc_pack_u16_swap12 \n\ | |
Laudio_orc_pack_u16_swap11: \n\ | |
asr r1, ip, #12 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_u16_swap10 \n\ | |
Laudio_orc_pack_u16_swap8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 6: storew \n\ | |
vst1.64 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u16_swap8 \n\ | |
b Laudio_orc_pack_u16_swap12 \n\ | |
Laudio_orc_pack_u16_swap10: \n\ | |
Laudio_orc_pack_u16_swap7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 6: storew \n\ | |
vst1.64 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u16_swap7 \n\ | |
Laudio_orc_pack_u16_swap12: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u16_swap14 \n\ | |
Laudio_orc_pack_u16_swap13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #16 \n\ | |
# 4: convlw \n\ | |
vmovn.i32 d6, q3 \n\ | |
# 5: swapw \n\ | |
vrev16.i8 d6, d6 \n\ | |
# 6: storew \n\ | |
vst1.16 d6[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_u16_swap13 \n\ | |
Laudio_orc_pack_u16_swap14: \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_pack_u16_swap20: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_s16_swap (static implementation) */ | |
void | |
audio_orc_pack_s16_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
Laudio_orc_pack_s16_swap15: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #1 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_s16_swap1 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_s16_swap2 \n\ | |
Laudio_orc_pack_s16_swap1: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_s16_swap2: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s16_swap6 \n\ | |
Laudio_orc_pack_s16_swap5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 4: storew \n\ | |
vst1.16 d4[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_s16_swap5 \n\ | |
Laudio_orc_pack_s16_swap6: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s16_swap12 \n\ | |
asr r1, ip, #16 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_s16_swap11 \n\ | |
Laudio_orc_pack_s16_swap9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 4: storew \n\ | |
vst1.64 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s16_swap9 \n\ | |
b Laudio_orc_pack_s16_swap12 \n\ | |
Laudio_orc_pack_s16_swap11: \n\ | |
asr r1, ip, #12 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_s16_swap10 \n\ | |
Laudio_orc_pack_s16_swap8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 4: storew \n\ | |
vst1.64 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s16_swap8 \n\ | |
b Laudio_orc_pack_s16_swap12 \n\ | |
Laudio_orc_pack_s16_swap10: \n\ | |
Laudio_orc_pack_s16_swap7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 4: storew \n\ | |
vst1.64 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s16_swap7 \n\ | |
Laudio_orc_pack_s16_swap12: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s16_swap14 \n\ | |
Laudio_orc_pack_s16_swap13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #16 \n\ | |
# 2: convlw \n\ | |
vmovn.i32 d4, q2 \n\ | |
# 3: swapw \n\ | |
vrev16.i8 d4, d4 \n\ | |
# 4: storew \n\ | |
vst1.16 d4[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_s16_swap13 \n\ | |
Laudio_orc_pack_s16_swap14: \n\ | |
nop \n\ | |
Laudio_orc_pack_s16_swap20: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_u24_32 (static implementation) */ | |
void | |
audio_orc_pack_u24_32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
Laudio_orc_pack_u24_3215: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_u24_321 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_u24_322 \n\ | |
Laudio_orc_pack_u24_321: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_u24_322: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u24_326 \n\ | |
Laudio_orc_pack_u24_325: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #8 \n\ | |
# 4: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_u24_325 \n\ | |
Laudio_orc_pack_u24_326: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u24_3212 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_u24_3211 \n\ | |
Laudio_orc_pack_u24_329: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #8 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u24_329 \n\ | |
b Laudio_orc_pack_u24_3212 \n\ | |
Laudio_orc_pack_u24_3211: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_u24_3210 \n\ | |
Laudio_orc_pack_u24_328: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #8 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u24_328 \n\ | |
b Laudio_orc_pack_u24_3212 \n\ | |
Laudio_orc_pack_u24_3210: \n\ | |
Laudio_orc_pack_u24_327: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #8 \n\ | |
# 4: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u24_327 \n\ | |
Laudio_orc_pack_u24_3212: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u24_3214 \n\ | |
Laudio_orc_pack_u24_3213: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #8 \n\ | |
# 4: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_u24_3213 \n\ | |
Laudio_orc_pack_u24_3214: \n\ | |
Laudio_orc_pack_u24_3220: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_s24_32 (static implementation) */ | |
void | |
audio_orc_pack_s24_32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
Laudio_orc_pack_s24_3215: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_pack_s24_324 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s24_3212 \n\ | |
Laudio_orc_pack_s24_323: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s24_323 \n\ | |
b Laudio_orc_pack_s24_3212 \n\ | |
Laudio_orc_pack_s24_324: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_s24_321 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_s24_322 \n\ | |
Laudio_orc_pack_s24_321: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_s24_322: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s24_326 \n\ | |
Laudio_orc_pack_s24_325: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #8 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_s24_325 \n\ | |
Laudio_orc_pack_s24_326: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s24_3212 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_s24_3211 \n\ | |
Laudio_orc_pack_s24_329: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s24_329 \n\ | |
b Laudio_orc_pack_s24_3212 \n\ | |
Laudio_orc_pack_s24_3211: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_s24_3210 \n\ | |
Laudio_orc_pack_s24_328: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s24_328 \n\ | |
b Laudio_orc_pack_s24_3212 \n\ | |
Laudio_orc_pack_s24_3210: \n\ | |
Laudio_orc_pack_s24_327: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s24_327 \n\ | |
Laudio_orc_pack_s24_3212: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s24_3214 \n\ | |
Laudio_orc_pack_s24_3213: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #8 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_s24_3213 \n\ | |
Laudio_orc_pack_s24_3214: \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_pack_s24_3220: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_u24_32_swap (static implementation) */ | |
void | |
audio_orc_pack_u24_32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
Laudio_orc_pack_u24_32_swap15: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_u24_32_swap1 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_u24_32_swap2 \n\ | |
Laudio_orc_pack_u24_32_swap1: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_u24_32_swap2: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u24_32_swap6 \n\ | |
Laudio_orc_pack_u24_32_swap5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #8 \n\ | |
# 4: swapl \n\ | |
vrev32.i8 d6, d6 \n\ | |
# 5: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_u24_32_swap5 \n\ | |
Laudio_orc_pack_u24_32_swap6: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u24_32_swap12 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_u24_32_swap11 \n\ | |
Laudio_orc_pack_u24_32_swap9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #8 \n\ | |
# 4: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u24_32_swap9 \n\ | |
b Laudio_orc_pack_u24_32_swap12 \n\ | |
Laudio_orc_pack_u24_32_swap11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_u24_32_swap10 \n\ | |
Laudio_orc_pack_u24_32_swap8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #8 \n\ | |
# 4: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u24_32_swap8 \n\ | |
b Laudio_orc_pack_u24_32_swap12 \n\ | |
Laudio_orc_pack_u24_32_swap10: \n\ | |
Laudio_orc_pack_u24_32_swap7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: shrul \n\ | |
vshr.u32 q3, q3, #8 \n\ | |
# 4: swapl \n\ | |
vrev32.i8 q3, q3 \n\ | |
# 5: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u24_32_swap7 \n\ | |
Laudio_orc_pack_u24_32_swap12: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u24_32_swap14 \n\ | |
Laudio_orc_pack_u24_32_swap13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: shrul \n\ | |
vshr.u32 d6, d6, #8 \n\ | |
# 4: swapl \n\ | |
vrev32.i8 d6, d6 \n\ | |
# 5: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_u24_32_swap13 \n\ | |
Laudio_orc_pack_u24_32_swap14: \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_pack_u24_32_swap20: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_s24_32_swap (static implementation) */ | |
void | |
audio_orc_pack_s24_32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
Laudio_orc_pack_s24_32_swap15: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_pack_s24_32_swap4 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s24_32_swap12 \n\ | |
Laudio_orc_pack_s24_32_swap3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 3: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s24_32_swap3 \n\ | |
b Laudio_orc_pack_s24_32_swap12 \n\ | |
Laudio_orc_pack_s24_32_swap4: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_s24_32_swap1 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_s24_32_swap2 \n\ | |
Laudio_orc_pack_s24_32_swap1: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_s24_32_swap2: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s24_32_swap6 \n\ | |
Laudio_orc_pack_s24_32_swap5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #8 \n\ | |
# 2: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_s24_32_swap5 \n\ | |
Laudio_orc_pack_s24_32_swap6: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s24_32_swap12 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_s24_32_swap11 \n\ | |
Laudio_orc_pack_s24_32_swap9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s24_32_swap9 \n\ | |
b Laudio_orc_pack_s24_32_swap12 \n\ | |
Laudio_orc_pack_s24_32_swap11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_s24_32_swap10 \n\ | |
Laudio_orc_pack_s24_32_swap8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s24_32_swap8 \n\ | |
b Laudio_orc_pack_s24_32_swap12 \n\ | |
Laudio_orc_pack_s24_32_swap10: \n\ | |
Laudio_orc_pack_s24_32_swap7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 q2, q2, #8 \n\ | |
# 2: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s24_32_swap7 \n\ | |
Laudio_orc_pack_s24_32_swap12: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s24_32_swap14 \n\ | |
Laudio_orc_pack_s24_32_swap13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: shrsl \n\ | |
vshr.s32 d4, d4, #8 \n\ | |
# 2: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_s24_32_swap13 \n\ | |
Laudio_orc_pack_s24_32_swap14: \n\ | |
nop \n\ | |
Laudio_orc_pack_s24_32_swap20: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_u32 (static implementation) */ | |
void | |
audio_orc_pack_u32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
Laudio_orc_pack_u3215: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_pack_u324 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u3212 \n\ | |
Laudio_orc_pack_u323: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.8 { d6, d7 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u323 \n\ | |
b Laudio_orc_pack_u3212 \n\ | |
Laudio_orc_pack_u324: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_u321 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_u322 \n\ | |
Laudio_orc_pack_u321: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_u322: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u326 \n\ | |
Laudio_orc_pack_u325: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_u325 \n\ | |
Laudio_orc_pack_u326: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u3212 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_u3211 \n\ | |
Laudio_orc_pack_u329: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u329 \n\ | |
b Laudio_orc_pack_u3212 \n\ | |
Laudio_orc_pack_u3211: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_u3210 \n\ | |
Laudio_orc_pack_u328: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u328 \n\ | |
b Laudio_orc_pack_u3212 \n\ | |
Laudio_orc_pack_u3210: \n\ | |
Laudio_orc_pack_u327: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u327 \n\ | |
Laudio_orc_pack_u3212: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u3214 \n\ | |
Laudio_orc_pack_u3213: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_u3213 \n\ | |
Laudio_orc_pack_u3214: \n\ | |
nop \n\ | |
Laudio_orc_pack_u3220: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_s32 (static implementation) */ | |
void | |
audio_orc_pack_s32 (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
Laudio_orc_pack_s3215: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_pack_s324 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s3212 \n\ | |
Laudio_orc_pack_s323: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s323 \n\ | |
b Laudio_orc_pack_s3212 \n\ | |
Laudio_orc_pack_s324: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_s321 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_s322 \n\ | |
Laudio_orc_pack_s321: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_s322: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s326 \n\ | |
Laudio_orc_pack_s325: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_s325 \n\ | |
Laudio_orc_pack_s326: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s3212 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_s3211 \n\ | |
Laudio_orc_pack_s329: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s329 \n\ | |
b Laudio_orc_pack_s3212 \n\ | |
Laudio_orc_pack_s3211: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_s3210 \n\ | |
Laudio_orc_pack_s328: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s328 \n\ | |
b Laudio_orc_pack_s3212 \n\ | |
Laudio_orc_pack_s3210: \n\ | |
Laudio_orc_pack_s327: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s327 \n\ | |
Laudio_orc_pack_s3212: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s3214 \n\ | |
Laudio_orc_pack_s3213: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: copyl \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_s3213 \n\ | |
Laudio_orc_pack_s3214: \n\ | |
nop \n\ | |
Laudio_orc_pack_s3220: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_u32_swap (static implementation) */ | |
void | |
audio_orc_pack_u32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 1: loadpl \n\ | |
vmov.i32 q2, #0x00000000 \n\ | |
vorr.i32 q2, #0x80000000 \n\ | |
Laudio_orc_pack_u32_swap15: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_pack_u32_swap4 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u32_swap12 \n\ | |
Laudio_orc_pack_u32_swap3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.8 { d6, d7 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u32_swap3 \n\ | |
b Laudio_orc_pack_u32_swap12 \n\ | |
Laudio_orc_pack_u32_swap4: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_u32_swap1 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_u32_swap2 \n\ | |
Laudio_orc_pack_u32_swap1: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_u32_swap2: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u32_swap6 \n\ | |
Laudio_orc_pack_u32_swap5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_u32_swap5 \n\ | |
Laudio_orc_pack_u32_swap6: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u32_swap12 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_u32_swap11 \n\ | |
Laudio_orc_pack_u32_swap9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u32_swap9 \n\ | |
b Laudio_orc_pack_u32_swap12 \n\ | |
Laudio_orc_pack_u32_swap11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_u32_swap10 \n\ | |
Laudio_orc_pack_u32_swap8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u32_swap8 \n\ | |
b Laudio_orc_pack_u32_swap12 \n\ | |
Laudio_orc_pack_u32_swap10: \n\ | |
Laudio_orc_pack_u32_swap7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d6, d7 }, [r3] \n\ | |
# 2: xorl \n\ | |
veor q3, q3, q2 \n\ | |
# 3: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_u32_swap7 \n\ | |
Laudio_orc_pack_u32_swap12: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_u32_swap14 \n\ | |
Laudio_orc_pack_u32_swap13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d6[0], [r3] \n\ | |
# 2: xorl \n\ | |
veor d6, d6, d4 \n\ | |
# 3: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_u32_swap13 \n\ | |
Laudio_orc_pack_u32_swap14: \n\ | |
nop \n\ | |
Laudio_orc_pack_u32_swap20: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_s32_swap (static implementation) */ | |
void | |
audio_orc_pack_s32_swap (guint8 * ORC_RESTRICT d1, const gint32 * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
Laudio_orc_pack_s32_swap15: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_pack_s32_swap4 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s32_swap12 \n\ | |
Laudio_orc_pack_s32_swap3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s32_swap3 \n\ | |
b Laudio_orc_pack_s32_swap12 \n\ | |
Laudio_orc_pack_s32_swap4: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_s32_swap1 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_s32_swap2 \n\ | |
Laudio_orc_pack_s32_swap1: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_s32_swap2: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s32_swap6 \n\ | |
Laudio_orc_pack_s32_swap5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_s32_swap5 \n\ | |
Laudio_orc_pack_s32_swap6: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s32_swap12 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_s32_swap11 \n\ | |
Laudio_orc_pack_s32_swap9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s32_swap9 \n\ | |
b Laudio_orc_pack_s32_swap12 \n\ | |
Laudio_orc_pack_s32_swap11: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_s32_swap10 \n\ | |
Laudio_orc_pack_s32_swap8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s32_swap8 \n\ | |
b Laudio_orc_pack_s32_swap12 \n\ | |
Laudio_orc_pack_s32_swap10: \n\ | |
Laudio_orc_pack_s32_swap7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 0: loadl \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_s32_swap7 \n\ | |
Laudio_orc_pack_s32_swap12: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_s32_swap14 \n\ | |
Laudio_orc_pack_s32_swap13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadl \n\ | |
vld1.32 d4[0], [r3] \n\ | |
# 1: swapl \n\ | |
vrev32.i8 d4, d4 \n\ | |
# 2: storel \n\ | |
vst1.32 d4[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
add r3, r3, #0x00000004 \n\ | |
bne Laudio_orc_pack_s32_swap13 \n\ | |
Laudio_orc_pack_s32_swap14: \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_pack_s32_swap20: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_f32 (static implementation) */ | |
void | |
audio_orc_pack_f32 (gfloat * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n){ | |
int i; | |
orc_union32 * ORC_RESTRICT ptr0; | |
const orc_union64 * ORC_RESTRICT ptr4; | |
orc_union64 var32; | |
orc_union32 var33; | |
ptr0 = (orc_union32 *)d1; | |
ptr4 = (orc_union64 *)s1; | |
for (i = 0; i < n; i++) { | |
/* 0: loadq */ | |
var32 = ptr4[i]; | |
/* 1: convdf */ | |
{ | |
orc_union64 _src1; | |
orc_union32 _dest; | |
_src1.i = ORC_DENORMAL_DOUBLE(var32.i); | |
_dest.f = _src1.f; | |
var33.i = ORC_DENORMAL(_dest.i); | |
} | |
/* 2: storel */ | |
ptr0[i] = var33; | |
} | |
} | |
/* audio_orc_pack_f32_swap (static implementation) */ | |
void | |
audio_orc_pack_f32_swap (gfloat * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n){ | |
int i; | |
orc_union32 * ORC_RESTRICT ptr0; | |
const orc_union64 * ORC_RESTRICT ptr4; | |
orc_union64 var33; | |
orc_union32 var34; | |
orc_union32 var35; | |
ptr0 = (orc_union32 *)d1; | |
ptr4 = (orc_union64 *)s1; | |
for (i = 0; i < n; i++) { | |
/* 0: loadq */ | |
var33 = ptr4[i]; | |
/* 1: convdf */ | |
{ | |
orc_union64 _src1; | |
orc_union32 _dest; | |
_src1.i = ORC_DENORMAL_DOUBLE(var33.i); | |
_dest.f = _src1.f; | |
var35.i = ORC_DENORMAL(_dest.i); | |
} | |
/* 2: swapl */ | |
var34.i = ORC_SWAP_L(var35.i); | |
/* 3: storel */ | |
ptr0[i] = var34; | |
} | |
} | |
/* audio_orc_pack_f64 (static implementation) */ | |
void | |
audio_orc_pack_f64 (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 1; | |
uint32_t counter3 = counter2 & ((1<<1)-1); | |
asm(" \n\ | |
Laudio_orc_pack_f6415: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_pack_f644 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_f6412 \n\ | |
Laudio_orc_pack_f643: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_f643 \n\ | |
b Laudio_orc_pack_f6412 \n\ | |
Laudio_orc_pack_f644: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #3 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_f641 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #1 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #1 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_f642 \n\ | |
Laudio_orc_pack_f641: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_f642: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_f646 \n\ | |
Laudio_orc_pack_f645: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_pack_f645 \n\ | |
Laudio_orc_pack_f646: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_f6412 \n\ | |
asr r1, ip, #19 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_f6411 \n\ | |
Laudio_orc_pack_f649: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_f649 \n\ | |
b Laudio_orc_pack_f6412 \n\ | |
Laudio_orc_pack_f6411: \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_f6410 \n\ | |
Laudio_orc_pack_f648: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_f648 \n\ | |
b Laudio_orc_pack_f6412 \n\ | |
Laudio_orc_pack_f6410: \n\ | |
Laudio_orc_pack_f647: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_f647 \n\ | |
Laudio_orc_pack_f6412: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_f6414 \n\ | |
Laudio_orc_pack_f6413: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: copyq \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_pack_f6413 \n\ | |
Laudio_orc_pack_f6414: \n\ | |
nop \n\ | |
Laudio_orc_pack_f6420: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_pack_f64_swap (static implementation) */ | |
void | |
audio_orc_pack_f64_swap (gdouble * ORC_RESTRICT d1, const gdouble * ORC_RESTRICT s1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 1; | |
uint32_t counter3 = counter2 & ((1<<1)-1); | |
asm(" \n\ | |
Laudio_orc_pack_f64_swap15: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_pack_f64_swap4 \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_f64_swap12 \n\ | |
Laudio_orc_pack_f64_swap3: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.8 { d4, d5 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_f64_swap3 \n\ | |
b Laudio_orc_pack_f64_swap12 \n\ | |
Laudio_orc_pack_f64_swap4: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #3 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_pack_f64_swap1 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #1 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #1 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_pack_f64_swap2 \n\ | |
Laudio_orc_pack_f64_swap1: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_pack_f64_swap2: \n\ | |
mov r2, %[d1] \n\ | |
mov r3, %[s1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_f64_swap6 \n\ | |
Laudio_orc_pack_f64_swap5: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 d4, d4 \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_pack_f64_swap5 \n\ | |
Laudio_orc_pack_f64_swap6: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_f64_swap12 \n\ | |
asr r1, ip, #19 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_f64_swap11 \n\ | |
Laudio_orc_pack_f64_swap9: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_f64_swap9 \n\ | |
b Laudio_orc_pack_f64_swap12 \n\ | |
Laudio_orc_pack_f64_swap11: \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_pack_f64_swap10 \n\ | |
Laudio_orc_pack_f64_swap8: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_f64_swap8 \n\ | |
b Laudio_orc_pack_f64_swap12 \n\ | |
Laudio_orc_pack_f64_swap10: \n\ | |
Laudio_orc_pack_f64_swap7: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 0: loadq \n\ | |
vld1.8 { d4, d5 }, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d4, d5 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
add r3, r3, #0x00000010 \n\ | |
bne Laudio_orc_pack_f64_swap7 \n\ | |
Laudio_orc_pack_f64_swap12: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_pack_f64_swap14 \n\ | |
Laudio_orc_pack_f64_swap13: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 0: loadq \n\ | |
vld1.8 d4, [r3] \n\ | |
# 1: swapq \n\ | |
vrev64.i8 d4, d4 \n\ | |
# 2: storeq \n\ | |
vst1.8 d4, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
add r3, r3, #0x00000008 \n\ | |
bne Laudio_orc_pack_f64_swap13 \n\ | |
Laudio_orc_pack_f64_swap14: \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_pack_f64_swap20: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[s1] "+rm" (s1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_splat_u16 (static implementation) */ | |
void | |
audio_orc_splat_u16 (guint16 * ORC_RESTRICT d1, int p1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 3; | |
uint32_t counter3 = counter2 & ((1<<3)-1); | |
asm(" \n\ | |
# 0: loadpw \n\ | |
add r1, r0, #0x00000278 \n\ | |
vld1.16 {d4[],d5[]}, [r1] \n\ | |
Laudio_orc_splat_u1615: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_splat_u164 \n\ | |
mov r2, %[d1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_splat_u1612 \n\ | |
Laudio_orc_splat_u163: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 3 \n\ | |
# 1: copyw \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storew \n\ | |
vst1.8 { d6, d7 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne Laudio_orc_splat_u163 \n\ | |
b Laudio_orc_splat_u1612 \n\ | |
Laudio_orc_splat_u164: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #1 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_splat_u161 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #3 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #7 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_splat_u162 \n\ | |
Laudio_orc_splat_u161: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_splat_u162: \n\ | |
mov r2, %[d1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_splat_u166 \n\ | |
Laudio_orc_splat_u165: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 1: copyw \n\ | |
vorr d6, d4, d4 \n\ | |
# 2: storew \n\ | |
vst1.16 d6[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
bne Laudio_orc_splat_u165 \n\ | |
Laudio_orc_splat_u166: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_splat_u1612 \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_splat_u1611 \n\ | |
Laudio_orc_splat_u169: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 3 \n\ | |
# 1: copyw \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storew \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne Laudio_orc_splat_u169 \n\ | |
b Laudio_orc_splat_u1612 \n\ | |
Laudio_orc_splat_u1611: \n\ | |
asr r1, ip, #11 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_splat_u1610 \n\ | |
Laudio_orc_splat_u168: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 3 \n\ | |
# 1: copyw \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storew \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne Laudio_orc_splat_u168 \n\ | |
b Laudio_orc_splat_u1612 \n\ | |
Laudio_orc_splat_u1610: \n\ | |
Laudio_orc_splat_u167: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 3 \n\ | |
# 1: copyw \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storew \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne Laudio_orc_splat_u167 \n\ | |
Laudio_orc_splat_u1612: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_splat_u1614 \n\ | |
Laudio_orc_splat_u1613: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 1: copyw \n\ | |
vorr d6, d4, d4 \n\ | |
# 2: storew \n\ | |
vst1.16 d6[0], [r2] \n\ | |
add r2, r2, #0x00000002 \n\ | |
bne Laudio_orc_splat_u1613 \n\ | |
Laudio_orc_splat_u1614: \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_splat_u1620: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_splat_u32 (static implementation) */ | |
void | |
audio_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 2; | |
uint32_t counter3 = counter2 & ((1<<2)-1); | |
asm(" \n\ | |
# 0: loadpl \n\ | |
add r1, r0, #0x00000278 \n\ | |
vld1.32 {d4[],d5[]}, [r1] \n\ | |
Laudio_orc_splat_u3215: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_splat_u324 \n\ | |
mov r2, %[d1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_splat_u3212 \n\ | |
Laudio_orc_splat_u323: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 1: copyl \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.8 { d6, d7 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne Laudio_orc_splat_u323 \n\ | |
b Laudio_orc_splat_u3212 \n\ | |
Laudio_orc_splat_u324: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #2 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_splat_u321 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #2 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #3 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_splat_u322 \n\ | |
Laudio_orc_splat_u321: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_splat_u322: \n\ | |
mov r2, %[d1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_splat_u326 \n\ | |
Laudio_orc_splat_u325: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 1: copyl \n\ | |
vorr d6, d4, d4 \n\ | |
# 2: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
bne Laudio_orc_splat_u325 \n\ | |
Laudio_orc_splat_u326: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_splat_u3212 \n\ | |
asr r1, ip, #17 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_splat_u3211 \n\ | |
Laudio_orc_splat_u329: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 1: copyl \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne Laudio_orc_splat_u329 \n\ | |
b Laudio_orc_splat_u3212 \n\ | |
Laudio_orc_splat_u3211: \n\ | |
asr r1, ip, #13 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_splat_u3210 \n\ | |
Laudio_orc_splat_u328: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 1: copyl \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne Laudio_orc_splat_u328 \n\ | |
b Laudio_orc_splat_u3212 \n\ | |
Laudio_orc_splat_u3210: \n\ | |
Laudio_orc_splat_u327: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 2 \n\ | |
# 1: copyl \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storel \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne Laudio_orc_splat_u327 \n\ | |
Laudio_orc_splat_u3212: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_splat_u3214 \n\ | |
Laudio_orc_splat_u3213: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 1: copyl \n\ | |
vorr d6, d4, d4 \n\ | |
# 2: storel \n\ | |
vst1.32 d6[0], [r2] \n\ | |
add r2, r2, #0x00000004 \n\ | |
bne Laudio_orc_splat_u3213 \n\ | |
Laudio_orc_splat_u3214: \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_splat_u3220: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
/* audio_orc_splat_u64 (static implementation) */ | |
void | |
audio_orc_splat_u64 (guint64 * ORC_RESTRICT d1, int p1, int n) { | |
uint32_t counter1 = 0; | |
uint32_t counter2 = 64 >> 1; | |
uint32_t counter3 = counter2 & ((1<<1)-1); | |
asm(" \n\ | |
# 0: loadpq \n\ | |
add r1, r0, #0x00000278 \n\ | |
vld1.32 d4[0], [r1] \n\ | |
vld1.32 d5[0], [r1] \n\ | |
add r1, r0, #0x00000298 \n\ | |
vld1.32 d4[1], [r1] \n\ | |
vld1.32 d5[1], [r1] \n\ | |
Laudio_orc_splat_u6415: \n\ | |
mov r2, %[counter2] \n\ | |
cmp r2, #0x00000040 \n\ | |
bgt Laudio_orc_splat_u644 \n\ | |
mov r2, %[d1] \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_splat_u6412 \n\ | |
Laudio_orc_splat_u643: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 1: copyq \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.8 { d6, d7 }, [r2] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne Laudio_orc_splat_u643 \n\ | |
b Laudio_orc_splat_u6412 \n\ | |
Laudio_orc_splat_u644: \n\ | |
mov ip, #0x00000010 \n\ | |
mov r1, %[d1] \n\ | |
sub ip, ip, r1 \n\ | |
and ip, ip, #15 \n\ | |
asr ip, ip, #3 \n\ | |
mov r2, %[n] \n\ | |
cmp r2, ip \n\ | |
ble Laudio_orc_splat_u641 \n\ | |
str ip, %[counter1] \n\ | |
sub r1, r2, ip \n\ | |
asr r2, r1, #1 \n\ | |
str r2, %[counter2] \n\ | |
and r2, r1, #1 \n\ | |
str r2, %[counter3] \n\ | |
b Laudio_orc_splat_u642 \n\ | |
Laudio_orc_splat_u641: \n\ | |
str r2, %[counter1] \n\ | |
mov r2, #0x00000000 \n\ | |
str r2, %[counter2] \n\ | |
str r2, %[counter3] \n\ | |
Laudio_orc_splat_u642: \n\ | |
mov r2, %[d1] \n\ | |
mov ip, %[counter1] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_splat_u646 \n\ | |
Laudio_orc_splat_u645: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 1: copyq \n\ | |
vorr d6, d4, d4 \n\ | |
# 2: storeq \n\ | |
vst1.8 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
bne Laudio_orc_splat_u645 \n\ | |
Laudio_orc_splat_u646: \n\ | |
mov ip, %[counter2] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_splat_u6412 \n\ | |
asr r1, ip, #19 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_splat_u6411 \n\ | |
Laudio_orc_splat_u649: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 1: copyq \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne Laudio_orc_splat_u649 \n\ | |
b Laudio_orc_splat_u6412 \n\ | |
Laudio_orc_splat_u6411: \n\ | |
asr r1, ip, #15 \n\ | |
cmp r1, #0x00000000 \n\ | |
beq Laudio_orc_splat_u6410 \n\ | |
Laudio_orc_splat_u648: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 1: copyq \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne Laudio_orc_splat_u648 \n\ | |
b Laudio_orc_splat_u6412 \n\ | |
Laudio_orc_splat_u6410: \n\ | |
Laudio_orc_splat_u647: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 1 \n\ | |
# 1: copyq \n\ | |
vorr q3, q2, q2 \n\ | |
# 2: storeq \n\ | |
vst1.64 { d6, d7 }, [r2,:128] \n\ | |
add r2, r2, #0x00000010 \n\ | |
bne Laudio_orc_splat_u647 \n\ | |
Laudio_orc_splat_u6412: \n\ | |
mov ip, %[counter3] \n\ | |
cmp ip, #0x00000000 \n\ | |
beq Laudio_orc_splat_u6414 \n\ | |
Laudio_orc_splat_u6413: \n\ | |
subs ip, ip, #0x00000001 \n\ | |
# LOOP shift 0 \n\ | |
# 1: copyq \n\ | |
vorr d6, d4, d4 \n\ | |
# 2: storeq \n\ | |
vst1.8 d6, [r2] \n\ | |
add r2, r2, #0x00000008 \n\ | |
bne Laudio_orc_splat_u6413 \n\ | |
Laudio_orc_splat_u6414: \n\ | |
nop \n\ | |
nop \n\ | |
nop \n\ | |
Laudio_orc_splat_u6420: \n\ | |
.word 0x07060706 \n\ | |
.word 0x07060706 \n\ | |
.word 0x0f0e0f0e \n\ | |
.word 0x0f0e0f0e \n\ | |
" | |
: | |
[d1] "+rm" (d1), | |
[counter1] "+r" (counter1), | |
[counter2] "+r" (counter2), | |
[counter3] "+r" (counter3) | |
: | |
[n] "r" (n) | |
); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment